From bf37da98c51825c90432d340e135cced37a7460d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 12 Mar 2020 16:55:07 -0700
Subject: rcu: Don't acquire lock in NMI handler in rcu_nmi_enter_common()

The rcu_nmi_enter_common() function can be invoked both in interrupt
and NMI handlers.  If it is invoked from process context (as opposed
to userspace or idle context) on a nohz_full CPU, it might acquire the
CPU's leaf rcu_node structure's ->lock.  Because this lock is held only
with interrupts disabled, this is safe from an interrupt handler, but
doing so from an NMI handler can result in self-deadlock.

This commit therefore adds "irq" to the "if" condition so as to only
acquire the ->lock from irq handlers or process context, never from
an NMI handler.

Fixes: 5b14557b073c ("rcu: Avoid tick_dep_set_cpu() misordering")
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: <stable@vger.kernel.org> # 5.5.x
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 550193a9ce76..2c17859233db 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -825,7 +825,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
 			rcu_cleanup_after_idle();
 
 		incby = 1;
-	} else if (tick_nohz_full_cpu(rdp->cpu) &&
+	} else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
 		   rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
 		   READ_ONCE(rdp->rcu_urgent_qs) &&
 		   !READ_ONCE(rdp->rcu_forced_tick)) {
-- 
cgit 


From eaec2b0bd30690575c581eebffae64bfb7f684ac Mon Sep 17 00:00:00 2001
From: Zhiqiang Liu <liuzhiqiang26@huawei.com>
Date: Mon, 30 Mar 2020 10:18:33 +0800
Subject: signal: check sig before setting info in kill_pid_usb_asyncio

In kill_pid_usb_asyncio, if signal is not valid, we do not need to
set info struct.

Signed-off-by: Zhiqiang Liu <liuzhiqiang26@huawei.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/f525fd08-1cf7-fb09-d20c-4359145eb940@huawei.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/signal.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index e58a6c619824..3f94894d1253 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1510,15 +1510,15 @@ int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr,
 	unsigned long flags;
 	int ret = -EINVAL;
 
+	if (!valid_signal(sig))
+		return ret;
+
 	clear_siginfo(&info);
 	info.si_signo = sig;
 	info.si_errno = errno;
 	info.si_code = SI_ASYNCIO;
 	*((sigval_t *)&info.si_pid) = addr;
 
-	if (!valid_signal(sig))
-		return ret;
-
 	rcu_read_lock();
 	p = pid_task(pid, PIDTYPE_PID);
 	if (!p) {
-- 
cgit 


From 3075afdf15b89a063f8d31c0db08a50472bb7faf Mon Sep 17 00:00:00 2001
From: Zhiqiang Liu <liuzhiqiang26@huawei.com>
Date: Mon, 30 Mar 2020 10:44:43 +0800
Subject: signal: use kill_proc_info instead of kill_pid_info in
 kill_something_info

signal.c provides kill_proc_info, we can use it instead of kill_pid_info
in kill_something_info func gracefully.

Signed-off-by: Zhiqiang Liu <liuzhiqiang26@huawei.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/80236965-f0b5-c888-95ff-855bdec75bb3@huawei.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/signal.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 3f94894d1253..713104884414 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1557,12 +1557,8 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
 {
 	int ret;
 
-	if (pid > 0) {
-		rcu_read_lock();
-		ret = kill_pid_info(sig, info, find_vpid(pid));
-		rcu_read_unlock();
-		return ret;
-	}
+	if (pid > 0)
+		return kill_proc_info(sig, info, pid);
 
 	/* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
 	if (pid == INT_MIN)
-- 
cgit 


From 07d8350ede4c4c29634b26c163a1eecdf39dfcfb Mon Sep 17 00:00:00 2001
From: afzal mohammed <afzal.mohd.ma@gmail.com>
Date: Fri, 27 Mar 2020 21:41:16 +0530
Subject: genirq: Remove setup_irq() and remove_irq()

Now that all the users of setup_irq() & remove_irq() have been replaced by
request_irq() & free_irq() respectively, delete them.

Signed-off-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lkml.kernel.org/r/0aa8771ada1ac8e1312f6882980c9c08bd023148.1585320721.git.afzal.mohd.ma@gmail.com
---
 kernel/irq/manage.c | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index fe40c658f86f..453a8a0f4804 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1690,34 +1690,6 @@ out_mput:
 	return ret;
 }
 
-/**
- *	setup_irq - setup an interrupt
- *	@irq: Interrupt line to setup
- *	@act: irqaction for the interrupt
- *
- * Used to statically setup interrupts in the early boot process.
- */
-int setup_irq(unsigned int irq, struct irqaction *act)
-{
-	int retval;
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
-		return -EINVAL;
-
-	retval = irq_chip_pm_get(&desc->irq_data);
-	if (retval < 0)
-		return retval;
-
-	retval = __setup_irq(irq, desc, act);
-
-	if (retval)
-		irq_chip_pm_put(&desc->irq_data);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(setup_irq);
-
 /*
  * Internal function to unregister an irqaction - used to free
  * regular and special interrupts that are part of the architecture.
@@ -1858,22 +1830,6 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
 	return action;
 }
 
-/**
- *	remove_irq - free an interrupt
- *	@irq: Interrupt line to free
- *	@act: irqaction for the interrupt
- *
- * Used to remove interrupts statically setup by the early boot process.
- */
-void remove_irq(unsigned int irq, struct irqaction *act)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc)))
-		__free_irq(desc, act->dev_id);
-}
-EXPORT_SYMBOL_GPL(remove_irq);
-
 /**
  *	free_irq - free an interrupt allocated with request_irq
  *	@irq: Interrupt line to free
-- 
cgit 


From e82a118f57b89bbb437ce70780fc2678d5c281e5 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr@redhat.com>
Date: Sun, 12 Apr 2020 22:25:33 +0200
Subject: clone3: fix cgroup argument sanity check

Checking that cgroup field value of struct clone_args is less than 0
is useless, as it is defined as unsigned 64-bit integer.  Moreover,
it doesn't catch the situations where its higher bits are lost during
the assignment to the cgroup field of the cgroup field of the internal
struct kernel_clone_args (where it is declared as signed 32-bit
integer), so it is still possible to pass garbage there.  A check
against INT_MAX solves both these issues.

Fixes: ef2c41cf38a7559b ("clone3: allow spawning processes into cgroups")
Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20200412202533.GA29554@asgard.redhat.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 4385f3d639f2..b4f7775623c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2631,7 +2631,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 		     !valid_signal(args.exit_signal)))
 		return -EINVAL;
 
-	if ((args.flags & CLONE_INTO_CGROUP) && args.cgroup < 0)
+	if ((args.flags & CLONE_INTO_CGROUP) && args.cgroup > INT_MAX)
 		return -EINVAL;
 
 	*kargs = (struct kernel_clone_args){
-- 
cgit 


From 62173872ca65767c586217dec0a32485da8a2f07 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr@redhat.com>
Date: Sun, 12 Apr 2020 22:31:23 +0200
Subject: clone3: add a check for the user struct size if CLONE_INTO_CGROUP is
 set

Passing CLONE_INTO_CGROUP with an under-sized structure (that doesn't
properly contain cgroup field) seems like garbage input, especially
considering the fact that fd 0 is a valid descriptor.

Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20200412203123.GA5869@asgard.redhat.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/fork.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index b4f7775623c8..3ab7cf88e455 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2631,7 +2631,8 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 		     !valid_signal(args.exit_signal)))
 		return -EINVAL;
 
-	if ((args.flags & CLONE_INTO_CGROUP) && args.cgroup > INT_MAX)
+	if ((args.flags & CLONE_INTO_CGROUP) &&
+	    (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2))
 		return -EINVAL;
 
 	*kargs = (struct kernel_clone_args){
-- 
cgit 


From a966dcfe153ab0a3d8d79cd971a079411a489be7 Mon Sep 17 00:00:00 2001
From: Eugene Syromiatnikov <esyr@redhat.com>
Date: Sun, 12 Apr 2020 22:26:58 +0200
Subject: clone3: add build-time CLONE_ARGS_SIZE_VER* validity checks

CLONE_ARGS_SIZE_VER* macros are defined explicitly and not via
the offsets of the relevant struct clone_args fields, which makes
it rather error-prone, so it probably makes sense to add some
compile-time checks for them (including the one that breaks
on struct clone_args extension as a reminder to add a relevant
size macro and a similar check).  Function copy_clone_args_from_user
seems to be a good place for such checks.

Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20200412202658.GA31499@asgard.redhat.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/fork.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 3ab7cf88e455..8c700f881d92 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2605,6 +2605,14 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 	struct clone_args args;
 	pid_t *kset_tid = kargs->set_tid;
 
+	BUILD_BUG_ON(offsetofend(struct clone_args, tls) !=
+		     CLONE_ARGS_SIZE_VER0);
+	BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) !=
+		     CLONE_ARGS_SIZE_VER1);
+	BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) !=
+		     CLONE_ARGS_SIZE_VER2);
+	BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
+
 	if (unlikely(usize > PAGE_SIZE))
 		return -E2BIG;
 	if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
-- 
cgit 


From 3662daf023500dc084fa3b96f68a6f46179ddc73 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 3 Apr 2020 18:35:17 -0400
Subject: sched/isolation: Allow "isolcpus=" to skip unknown sub-parameters

The "isolcpus=" parameter allows sub-parameters before the cpulist is
specified, and if the parser detects an unknown sub-parameters the whole
parameter will be ignored.

This design is incompatible with itself when new sub-parameters are added.
An older kernel will not recognize the new sub-parameter and will
invalidate the whole parameter so the CPU isolation will not take
effect. It emits a warning:

    isolcpus: Error, unknown flag

The better and compatible way is to allow "isolcpus=" to skip unknown
sub-parameters, so that even if new sub-parameters are added an older
kernel will still be able to behave as usual even if with the new
sub-parameter specified on the command line.

Ideally this should have been there when the first sub-parameter for
"isolcpus=" was introduced.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20200403223517.406353-1-peterx@redhat.com
---
 kernel/sched/isolation.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 008d6ac2342b..808244f3ddd9 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -149,6 +149,9 @@ __setup("nohz_full=", housekeeping_nohz_full_setup);
 static int __init housekeeping_isolcpus_setup(char *str)
 {
 	unsigned int flags = 0;
+	bool illegal = false;
+	char *par;
+	int len;
 
 	while (isalpha(*str)) {
 		if (!strncmp(str, "nohz,", 5)) {
@@ -169,8 +172,22 @@ static int __init housekeeping_isolcpus_setup(char *str)
 			continue;
 		}
 
-		pr_warn("isolcpus: Error, unknown flag\n");
-		return 0;
+		/*
+		 * Skip unknown sub-parameter and validate that it is not
+		 * containing an invalid character.
+		 */
+		for (par = str, len = 0; *str && *str != ','; str++, len++) {
+			if (!isalpha(*str) && *str != '_')
+				illegal = true;
+		}
+
+		if (illegal) {
+			pr_warn("isolcpus: Invalid flag %.*s\n", len, par);
+			return 0;
+		}
+
+		pr_info("isolcpus: Skipped unknown flag %.*s\n", len, par);
+		str++;
 	}
 
 	/* Default behaviour for isolcpus without flags */
-- 
cgit 


From e0d648f9d883ec1efab261af158d73aa30e9dd12 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 27 Mar 2020 22:43:34 +0100
Subject: sched/vtime: Work around an unitialized variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Work around this warning:

  kernel/sched/cputime.c: In function ‘kcpustat_field’:
  kernel/sched/cputime.c:1007:6: warning: ‘val’ may be used uninitialized in this function [-Wmaybe-uninitialized]

because GCC can't see that val is used only when err is 0.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200327214334.GF8015@zn.tnic
---
 kernel/sched/cputime.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index dac9104d126f..ff9435dee1df 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -1003,12 +1003,12 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 		   enum cpu_usage_stat usage, int cpu)
 {
 	u64 *cpustat = kcpustat->cpustat;
+	u64 val = cpustat[usage];
 	struct rq *rq;
-	u64 val;
 	int err;
 
 	if (!vtime_accounting_enabled_cpu(cpu))
-		return cpustat[usage];
+		return val;
 
 	rq = cpu_rq(cpu);
 
-- 
cgit 


From 94d440d618467806009c8edc70b094d64e12ee5a Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Sat, 11 Apr 2020 08:40:31 -0700
Subject: proc, time/namespace: Show clock symbolic names in
 /proc/pid/timens_offsets

Michael Kerrisk suggested to replace numeric clock IDs with symbolic names.

Now the content of these files looks like this:
$ cat /proc/774/timens_offsets
monotonic      864000         0
boottime      1728000         0

For setting offsets, both representations of clocks (numeric and symbolic)
can be used.

As for compatibility, it is acceptable to change things as long as
userspace doesn't care. The format of timens_offsets files is very new and
there are no userspace tools yet which rely on this format.

But three projects crun, util-linux and criu rely on the interface of
setting time offsets and this is why it's required to continue supporting
the numeric clock IDs on write.

Fixes: 04a8682a71be ("fs/proc: Introduce /proc/pid/timens_offsets")
Suggested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20200411154031.642557-1-avagin@gmail.com
---
 kernel/time/namespace.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 3b30288793fe..53bce347cd50 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -338,7 +338,20 @@ static struct user_namespace *timens_owner(struct ns_common *ns)
 
 static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
 {
-	seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec);
+	char *clock;
+
+	switch (clockid) {
+	case CLOCK_BOOTTIME:
+		clock = "boottime";
+		break;
+	case CLOCK_MONOTONIC:
+		clock = "monotonic";
+		break;
+	default:
+		clock = "unknown";
+		break;
+	}
+	seq_printf(m, "%-10s %10lld %9ld\n", clock, ts->tv_sec, ts->tv_nsec);
 }
 
 void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
-- 
cgit 


From 763dafc520add02a1f4639b500c509acc0ea8e5b Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 20 Apr 2020 16:24:34 -0400
Subject: audit: check the length of userspace generated audit records

Commit 756125289285 ("audit: always check the netlink payload length
in audit_receive_msg()") fixed a number of missing message length
checks, but forgot to check the length of userspace generated audit
records.  The good news is that you need CAP_AUDIT_WRITE to submit
userspace audit records, which is generally only given to trusted
processes, so the impact should be limited.

Cc: stable@vger.kernel.org
Fixes: 756125289285 ("audit: always check the netlink payload length in audit_receive_msg()")
Reported-by: syzbot+49e69b4d71a420ceda3e@syzkaller.appspotmail.com
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index b69c8b460341..87f31bf1f0a0 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1326,6 +1326,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
 		if (!audit_enabled && msg_type != AUDIT_USER_AVC)
 			return 0;
+		/* exit early if there isn't at least one character to print */
+		if (data_len < 2)
+			return -EINVAL;
 
 		err = audit_filter(msg_type, AUDIT_FILTER_USER);
 		if (err == 1) { /* match or error */
-- 
cgit 


From bc23d0e3f717ced21fbfacab3ab887d55e5ba367 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Thu, 16 Apr 2020 10:31:20 +0200
Subject: cpumap: Avoid warning when CONFIG_DEBUG_PER_CPU_MAPS is enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the kernel is built with CONFIG_DEBUG_PER_CPU_MAPS, the cpumap code
can trigger a spurious warning if CONFIG_CPUMASK_OFFSTACK is also set. This
happens because in this configuration, NR_CPUS can be larger than
nr_cpumask_bits, so the initial check in cpu_map_alloc() is not sufficient
to guard against hitting the warning in cpumask_check().

Fix this by explicitly checking the supplied key against the
nr_cpumask_bits variable before calling cpu_possible().

Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Xiumei Mu <xmu@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200416083120.453718-1-toke@redhat.com
---
 kernel/bpf/cpumap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 70f71b154fa5..3fe0b006d2d2 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -469,7 +469,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
 		return -EOVERFLOW;
 
 	/* Make sure CPU is a valid possible cpu */
-	if (!cpu_possible(key_cpu))
+	if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu))
 		return -ENODEV;
 
 	if (qsize == 0) {
-- 
cgit 


From 6e7e63cbb023976d828cdb22422606bf77baa8a9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 17 Apr 2020 02:00:06 +0200
Subject: bpf: Forbid XADD on spilled pointers for unprivileged users

When check_xadd() verifies an XADD operation on a pointer to a stack slot
containing a spilled pointer, check_stack_read() verifies that the read,
which is part of XADD, is valid. However, since the placeholder value -1 is
passed as `value_regno`, check_stack_read() can only return a binary
decision and can't return the type of the value that was read. The intent
here is to verify whether the value read from the stack slot may be used as
a SCALAR_VALUE; but since check_stack_read() doesn't check the type, and
the type information is lost when check_stack_read() returns, this is not
enforced, and a malicious user can abuse XADD to leak spilled kernel
pointers.

Fix it by letting check_stack_read() verify that the value is usable as a
SCALAR_VALUE if no type information is passed to the caller.

To be able to use __is_pointer_value() in check_stack_read(), move it up.

Fix up the expected unprivileged error message for a BPF selftest that,
until now, assumed that unprivileged users can use XADD on stack-spilled
pointers. This also gives us a test for the behavior introduced in this
patch for free.

In theory, this could also be fixed by forbidding XADD on stack spills
entirely, since XADD is a locked operation (for operations on memory with
concurrency) and there can't be any concurrency on the BPF stack; but
Alexei has said that he wants to keep XADD on stack slots working to avoid
changes to the test suite [1].

The following BPF program demonstrates how to leak a BPF map pointer as an
unprivileged user using this bug:

    // r7 = map_pointer
    BPF_LD_MAP_FD(BPF_REG_7, small_map),
    // r8 = launder(map_pointer)
    BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8),
    BPF_MOV64_IMM(BPF_REG_1, 0),
    ((struct bpf_insn) {
      .code  = BPF_STX | BPF_DW | BPF_XADD,
      .dst_reg = BPF_REG_FP,
      .src_reg = BPF_REG_1,
      .off = -8
    }),
    BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8),

    // store r8 into map
    BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7),
    BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
    BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4),
    BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0),
    BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
    BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
    BPF_EXIT_INSN(),
    BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),

    BPF_MOV64_IMM(BPF_REG_0, 0),
    BPF_EXIT_INSN()

[1] https://lore.kernel.org/bpf/20200416211116.qxqcza5vo2ddnkdq@ast-mbp.dhcp.thefacebook.com/

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200417000007.10734-1-jannh@google.com
---
 kernel/bpf/verifier.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 38cfcf701eeb..9e92d3d5ffd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg)
 	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
 }
 
+static bool __is_pointer_value(bool allow_ptr_leaks,
+			       const struct bpf_reg_state *reg)
+{
+	if (allow_ptr_leaks)
+		return false;
+
+	return reg->type != SCALAR_VALUE;
+}
+
 static void save_register_state(struct bpf_func_state *state,
 				int spi, struct bpf_reg_state *reg)
 {
@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
 			 * which resets stack/reg liveness for state transitions
 			 */
 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+			/* If value_regno==-1, the caller is asking us whether
+			 * it is acceptable to use this value as a SCALAR_VALUE
+			 * (e.g. for XADD).
+			 * We must not allow unprivileged callers to do that
+			 * with spilled pointers.
+			 */
+			verbose(env, "leaking pointer from stack off %d\n",
+				off);
+			return -EACCES;
 		}
 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 	} else {
@@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 	return -EACCES;
 }
 
-static bool __is_pointer_value(bool allow_ptr_leaks,
-			       const struct bpf_reg_state *reg)
-{
-	if (allow_ptr_leaks)
-		return false;
-
-	return reg->type != SCALAR_VALUE;
-}
-
 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
 {
 	return cur_regs(env) + regno;
-- 
cgit 


From 8ff3571f7e1bf3f293cc5e3dc14f2943f4fa7fcf Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 17 Apr 2020 02:00:07 +0200
Subject: bpf: Fix handling of XADD on BTF memory

check_xadd() can cause check_ptr_to_btf_access() to be executed with
atype==BPF_READ and value_regno==-1 (meaning "just check whether the access
is okay, don't tell me what type it will result in").
Handle that case properly and skip writing type information, instead of
indexing into the registers at index -1 and writing into out-of-bounds
memory.

Note that at least at the moment, you can't actually write through a BTF
pointer, so check_xadd() will reject the program after calling
check_ptr_to_btf_access with atype==BPF_WRITE; but that's after the
verifier has already corrupted memory.

This patch assumes that BTF pointers are not available in unprivileged
programs.

Fixes: 9e15db66136a ("bpf: Implement accurate raw_tp context access via BTF")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200417000007.10734-2-jannh@google.com
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e92d3d5ffd1..9382609147f5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3099,7 +3099,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	if (ret < 0)
 		return ret;
 
-	if (atype == BPF_READ) {
+	if (atype == BPF_READ && value_regno >= 0) {
 		if (ret == SCALAR_VALUE) {
 			mark_reg_unknown(env, regs, value_regno);
 			return 0;
-- 
cgit 


From 61e713bdca3678e84815f2427f7a063fc353a1fc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 20 Apr 2020 11:41:50 -0500
Subject: signal: Avoid corrupting si_pid and si_uid in do_notify_parent

Christof Meerwald <cmeerw@cmeerw.org> writes:
> Hi,
>
> this is probably related to commit
> 7a0cf094944e2540758b7f957eb6846d5126f535 (signal: Correct namespace
> fixups of si_pid and si_uid).
>
> With a 5.6.5 kernel I am seeing SIGCHLD signals that don't include a
> properly set si_pid field - this seems to happen for multi-threaded
> child processes.
>
> A simple test program (based on the sample from the signalfd man page):
>
> #include <sys/signalfd.h>
> #include <signal.h>
> #include <unistd.h>
> #include <spawn.h>
> #include <stdlib.h>
> #include <stdio.h>
>
> #define handle_error(msg) \
>     do { perror(msg); exit(EXIT_FAILURE); } while (0)
>
> int main(int argc, char *argv[])
> {
>   sigset_t mask;
>   int sfd;
>   struct signalfd_siginfo fdsi;
>   ssize_t s;
>
>   sigemptyset(&mask);
>   sigaddset(&mask, SIGCHLD);
>
>   if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
>     handle_error("sigprocmask");
>
>   pid_t chldpid;
>   char *chldargv[] = { "./sfdclient", NULL };
>   posix_spawn(&chldpid, "./sfdclient", NULL, NULL, chldargv, NULL);
>
>   sfd = signalfd(-1, &mask, 0);
>   if (sfd == -1)
>     handle_error("signalfd");
>
>   for (;;) {
>     s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
>     if (s != sizeof(struct signalfd_siginfo))
>       handle_error("read");
>
>     if (fdsi.ssi_signo == SIGCHLD) {
>       printf("Got SIGCHLD %d %d %d %d\n",
>           fdsi.ssi_status, fdsi.ssi_code,
>           fdsi.ssi_uid, fdsi.ssi_pid);
>       return 0;
>     } else {
>       printf("Read unexpected signal\n");
>     }
>   }
> }
>
>
> and a multi-threaded client to test with:
>
> #include <unistd.h>
> #include <pthread.h>
>
> void *f(void *arg)
> {
>   sleep(100);
> }
>
> int main()
> {
>   pthread_t t[8];
>
>   for (int i = 0; i != 8; ++i)
>   {
>     pthread_create(&t[i], NULL, f, NULL);
>   }
> }
>
> I tried to do a bit of debugging and what seems to be happening is
> that
>
>   /* From an ancestor pid namespace? */
>   if (!task_pid_nr_ns(current, task_active_pid_ns(t))) {
>
> fails inside task_pid_nr_ns because the check for "pid_alive" fails.
>
> This code seems to be called from do_notify_parent and there we
> actually have "tsk != current" (I am assuming both are threads of the
> current process?)

I instrumented the code with a warning and received the following backtrace:
> WARNING: CPU: 0 PID: 777 at kernel/pid.c:501 __task_pid_nr_ns.cold.6+0xc/0x15
> Modules linked in:
> CPU: 0 PID: 777 Comm: sfdclient Not tainted 5.7.0-rc1userns+ #2924
> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> RIP: 0010:__task_pid_nr_ns.cold.6+0xc/0x15
> Code: ff 66 90 48 83 ec 08 89 7c 24 04 48 8d 7e 08 48 8d 74 24 04 e8 9a b6 44 00 48 83 c4 08 c3 48 c7 c7 59 9f ac 82 e8 c2 c4 04 00 <0f> 0b e9 3fd
> RSP: 0018:ffffc9000042fbf8 EFLAGS: 00010046
> RAX: 000000000000000c RBX: 0000000000000000 RCX: ffffc9000042faf4
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff81193d29
> RBP: ffffc9000042fc18 R08: 0000000000000000 R09: 0000000000000001
> R10: 000000100f938416 R11: 0000000000000309 R12: ffff8880b941c140
> R13: 0000000000000000 R14: 0000000000000000 R15: ffff8880b941c140
> FS:  0000000000000000(0000) GS:ffff8880bca00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f2e8c0a32e0 CR3: 0000000002e10000 CR4: 00000000000006f0
> Call Trace:
>  send_signal+0x1c8/0x310
>  do_notify_parent+0x50f/0x550
>  release_task.part.21+0x4fd/0x620
>  do_exit+0x6f6/0xaf0
>  do_group_exit+0x42/0xb0
>  get_signal+0x13b/0xbb0
>  do_signal+0x2b/0x670
>  ? __audit_syscall_exit+0x24d/0x2b0
>  ? rcu_read_lock_sched_held+0x4d/0x60
>  ? kfree+0x24c/0x2b0
>  do_syscall_64+0x176/0x640
>  ? trace_hardirqs_off_thunk+0x1a/0x1c
>  entry_SYSCALL_64_after_hwframe+0x49/0xb3

The immediate problem is as Christof noticed that "pid_alive(current) == false".
This happens because do_notify_parent is called from the last thread to exit
in a process after that thread has been reaped.

The bigger issue is that do_notify_parent can be called from any
process that manages to wait on a thread of a multi-threaded process
from wait_task_zombie.  So any logic based upon current for
do_notify_parent is just nonsense, as current can be pretty much
anything.

So change do_notify_parent to call __send_signal directly.

Inspecting the code it appears this problem has existed since the pid
namespace support started handling this case in 2.6.30.  This fix only
backports to 7a0cf094944e ("signal: Correct namespace fixups of si_pid and si_uid")
where the problem logic was moved out of __send_signal and into send_signal.

Cc: stable@vger.kernel.org
Fixes: 6588c1e3ff01 ("signals: SI_USER: Masquerade si_pid when crossing pid ns boundary")
Ref: 921cf9f63089 ("signals: protect cinit from unblocked SIG_DFL signals")
Link: https://lore.kernel.org/lkml/20200419201336.GI22017@edge.cmeerw.net/
Reported-by: Christof Meerwald <cmeerw@cmeerw.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/signal.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index e58a6c619824..7938c60e11dd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1993,8 +1993,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
 			sig = 0;
 	}
+	/*
+	 * Send with __send_signal as si_pid and si_uid are in the
+	 * parent's namespaces.
+	 */
 	if (valid_signal(sig) && sig)
-		__group_send_sig_info(sig, &info, tsk->parent);
+		__send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false);
 	__wake_up_parent(tsk, tsk->parent);
 	spin_unlock_irqrestore(&psig->siglock, flags);
 
-- 
cgit 


From eaf5a92ebde5bca3bb2565616115bd6d579486cd Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 16 Apr 2020 09:59:56 +0100
Subject: sched/core: Fix reset-on-fork from RT with uclamp

uclamp_fork() resets the uclamp values to their default when the
reset-on-fork flag is set. It also checks whether the task has a RT
policy, and sets its uclamp.min to 1024 accordingly. However, during
reset-on-fork, the task's policy is lowered to SCHED_NORMAL right after,
hence leading to an erroneous uclamp.min setting for the new task if it
was forked from RT.

Fix this by removing the unnecessary check on rt_task() in
uclamp_fork() as this doesn't make sense if the reset-on-fork flag is
set.

Fixes: 1a00d999971c ("sched/uclamp: Set default clamps for RT tasks")
Reported-by: Chitti Babu Theegala <ctheegal@codeaurora.org>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Patrick Bellasi <patrick.bellasi@matbug.net>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lkml.kernel.org/r/20200416085956.217587-1-qperret@google.com
---
 kernel/sched/core.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a61a3b8eaa9..9a2fbf98fd6f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1232,13 +1232,8 @@ static void uclamp_fork(struct task_struct *p)
 		return;
 
 	for_each_clamp_id(clamp_id) {
-		unsigned int clamp_value = uclamp_none(clamp_id);
-
-		/* By default, RT tasks always get 100% boost */
-		if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
-			clamp_value = uclamp_none(UCLAMP_MAX);
-
-		uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
+		uclamp_se_set(&p->uclamp_req[clamp_id],
+			      uclamp_none(clamp_id), false);
 	}
 }
 
-- 
cgit 


From f3bed55e850926614b9898fe982f66d2541a36a5 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Fri, 17 Apr 2020 11:28:42 -0700
Subject: perf/core: fix parent pid/tid in task exit events

Current logic yields the child task as the parent.

Before:
$ perf record bash -c "perf list > /dev/null"
$ perf script -D |grep 'FORK\|EXIT'
4387036190981094 0x5a70 [0x30]: PERF_RECORD_FORK(10472:10472):(10470:10470)
4387036606207580 0xf050 [0x30]: PERF_RECORD_EXIT(10472:10472):(10472:10472)
4387036607103839 0x17150 [0x30]: PERF_RECORD_EXIT(10470:10470):(10470:10470)
                                                   ^
  Note the repeated values here -------------------/

After:
383281514043 0x9d8 [0x30]: PERF_RECORD_FORK(2268:2268):(2266:2266)
383442003996 0x2180 [0x30]: PERF_RECORD_EXIT(2268:2268):(2266:2266)
383451297778 0xb70 [0x30]: PERF_RECORD_EXIT(2266:2266):(2265:2265)

Fixes: 94d5d1b2d891 ("perf_counter: Report the cloning task as parent on perf_counter_fork()")
Reported-by: KP Singh <kpsingh@google.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200417182842.12522-1-irogers@google.com
---
 kernel/events/core.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index bc9b98a9af9a..633b4ae72ed5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7491,10 +7491,17 @@ static void perf_event_task_output(struct perf_event *event,
 		goto out;
 
 	task_event->event_id.pid = perf_event_pid(event, task);
-	task_event->event_id.ppid = perf_event_pid(event, current);
-
 	task_event->event_id.tid = perf_event_tid(event, task);
-	task_event->event_id.ptid = perf_event_tid(event, current);
+
+	if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
+		task_event->event_id.ppid = perf_event_pid(event,
+							task->real_parent);
+		task_event->event_id.ptid = perf_event_pid(event,
+							task->real_parent);
+	} else {  /* PERF_RECORD_FORK */
+		task_event->event_id.ppid = perf_event_pid(event, current);
+		task_event->event_id.ptid = perf_event_tid(event, current);
+	}
 
 	task_event->event_id.time = perf_event_clock(event);
 
-- 
cgit 


From 9da73974eb9c965dd9989befb593b8c8da9e4bdc Mon Sep 17 00:00:00 2001
From: Vamshi K Sthambamkadi <vamshi.k.sthambamkadi@gmail.com>
Date: Wed, 22 Apr 2020 11:45:06 +0530
Subject: tracing: Fix memory leaks in trace_events_hist.c

kmemleak report 1:
    [<9092c50b>] kmem_cache_alloc_trace+0x138/0x270
    [<05a2c9ed>] create_field_var+0xcf/0x180
    [<528a2d68>] action_create+0xe2/0xc80
    [<63f50b61>] event_hist_trigger_func+0x15b5/0x1920
    [<28ea5d3d>] trigger_process_regex+0x7b/0xc0
    [<3138e86f>] event_trigger_write+0x4d/0xb0
    [<ffd66c19>] __vfs_write+0x30/0x200
    [<4f424a0d>] vfs_write+0x96/0x1b0
    [<da59a290>] ksys_write+0x53/0xc0
    [<3717101a>] __ia32_sys_write+0x15/0x20
    [<c5f23497>] do_fast_syscall_32+0x70/0x250
    [<46e2629c>] entry_SYSENTER_32+0xaf/0x102

This is because save_vars[] of struct hist_trigger_data are
not destroyed

kmemleak report 2:
    [<9092c50b>] kmem_cache_alloc_trace+0x138/0x270
    [<6e5e97c5>] create_var+0x3c/0x110
    [<de82f1b9>] create_field_var+0xaf/0x180
    [<528a2d68>] action_create+0xe2/0xc80
    [<63f50b61>] event_hist_trigger_func+0x15b5/0x1920
    [<28ea5d3d>] trigger_process_regex+0x7b/0xc0
    [<3138e86f>] event_trigger_write+0x4d/0xb0
    [<ffd66c19>] __vfs_write+0x30/0x200
    [<4f424a0d>] vfs_write+0x96/0x1b0
    [<da59a290>] ksys_write+0x53/0xc0
    [<3717101a>] __ia32_sys_write+0x15/0x20
    [<c5f23497>] do_fast_syscall_32+0x70/0x250
    [<46e2629c>] entry_SYSENTER_32+0xaf/0x102

struct hist_field allocated through create_var() do not initialize
"ref" field to 1. The code in __destroy_hist_field() does not destroy
object if "ref" is initialized to zero, the condition
if (--hist_field->ref > 1) always passes since unsigned int wraps.

kmemleak report 3:
    [<f8666fcc>] __kmalloc_track_caller+0x139/0x2b0
    [<bb7f80a5>] kstrdup+0x27/0x50
    [<39d70006>] init_var_ref+0x58/0xd0
    [<8ca76370>] create_var_ref+0x89/0xe0
    [<f045fc39>] action_create+0x38f/0xc80
    [<7c146821>] event_hist_trigger_func+0x15b5/0x1920
    [<07de3f61>] trigger_process_regex+0x7b/0xc0
    [<e87daf8f>] event_trigger_write+0x4d/0xb0
    [<19bf1512>] __vfs_write+0x30/0x200
    [<64ce4d27>] vfs_write+0x96/0x1b0
    [<a6f34170>] ksys_write+0x53/0xc0
    [<7d4230cd>] __ia32_sys_write+0x15/0x20
    [<8eadca00>] do_fast_syscall_32+0x70/0x250
    [<235cf985>] entry_SYSENTER_32+0xaf/0x102

hist_fields (system & event_name) are not freed

Link: http://lkml.kernel.org/r/20200422061503.GA5151@cosmos

Signed-off-by: Vamshi K Sthambamkadi <vamshi.k.sthambamkadi@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel')

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5f6834a2bf41..fcab11cc6833 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3320,6 +3320,9 @@ static void __destroy_hist_field(struct hist_field *hist_field)
 	kfree(hist_field->name);
 	kfree(hist_field->type);
 
+	kfree(hist_field->system);
+	kfree(hist_field->event_name);
+
 	kfree(hist_field);
 }
 
@@ -4382,6 +4385,7 @@ static struct hist_field *create_var(struct hist_trigger_data *hist_data,
 		goto out;
 	}
 
+	var->ref = 1;
 	var->flags = HIST_FIELD_FL_VAR;
 	var->var.idx = idx;
 	var->var.hist_data = var->hist_data = hist_data;
@@ -5011,6 +5015,9 @@ static void destroy_field_vars(struct hist_trigger_data *hist_data)
 
 	for (i = 0; i < hist_data->n_field_vars; i++)
 		destroy_field_var(hist_data->field_vars[i]);
+
+	for (i = 0; i < hist_data->n_save_vars; i++)
+		destroy_field_var(hist_data->save_vars[i]);
 }
 
 static void save_field_var(struct hist_trigger_data *hist_data,
-- 
cgit 


From 353da87921a5ec654e7e9024e083f099f1b33c97 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 22 Apr 2020 21:38:45 -0400
Subject: ftrace: Fix memory leak caused by not freeing entry in
 unregister_ftrace_direct()

kmemleak reported the following:

unreferenced object 0xffff90d47127a920 (size 32):
  comm "modprobe", pid 1766, jiffies 4294792031 (age 162.568s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 22 01 00 00 00 00 ad de  ........".......
    00 78 12 a7 ff ff ff ff 00 00 b6 c0 ff ff ff ff  .x..............
  backtrace:
    [<00000000bb79e72e>] register_ftrace_direct+0xcb/0x3a0
    [<00000000295e4f79>] do_one_initcall+0x72/0x340
    [<00000000873ead18>] do_init_module+0x5a/0x220
    [<00000000974d9de5>] load_module+0x2235/0x2550
    [<0000000059c3d6ce>] __do_sys_finit_module+0xc0/0x120
    [<000000005a8611b4>] do_syscall_64+0x60/0x230
    [<00000000a0cdc49e>] entry_SYSCALL_64_after_hwframe+0x49/0xb3

The entry used to save the direct descriptor needs to be freed
when unregistering.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 041694a1eb74..bd030b1b9514 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5165,6 +5165,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
 			list_del_rcu(&direct->next);
 			synchronize_rcu_tasks();
 			kfree(direct);
+			kfree(entry);
 			ftrace_direct_func_count--;
 		}
 	}
-- 
cgit 


From d013496f99c5608e0f80afd67acb1ba93c4144ea Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Apr 2020 15:33:12 +0800
Subject: tracing: Convert local functions in tracing_map.c to static

Fix the following sparse warning:

kernel/trace/tracing_map.c:286:6: warning: symbol
'tracing_map_array_clear' was not declared. Should it be static?
kernel/trace/tracing_map.c:297:6: warning: symbol
'tracing_map_array_free' was not declared. Should it be static?
kernel/trace/tracing_map.c:319:26: warning: symbol
'tracing_map_array_alloc' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20200410073312.38855-1-yanaijie@huawei.com

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/tracing_map.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 9e31bfc818ff..74738c9856f1 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -283,7 +283,7 @@ int tracing_map_add_key_field(struct tracing_map *map,
 	return idx;
 }
 
-void tracing_map_array_clear(struct tracing_map_array *a)
+static void tracing_map_array_clear(struct tracing_map_array *a)
 {
 	unsigned int i;
 
@@ -294,7 +294,7 @@ void tracing_map_array_clear(struct tracing_map_array *a)
 		memset(a->pages[i], 0, PAGE_SIZE);
 }
 
-void tracing_map_array_free(struct tracing_map_array *a)
+static void tracing_map_array_free(struct tracing_map_array *a)
 {
 	unsigned int i;
 
@@ -316,7 +316,7 @@ void tracing_map_array_free(struct tracing_map_array *a)
 	kfree(a);
 }
 
-struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
+static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
 						  unsigned int entry_size)
 {
 	struct tracing_map_array *a;
-- 
cgit 


From 6ade99ec6175ab2b54c227521e181e1c3c2bfc8a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 24 Apr 2020 15:41:20 -0500
Subject: proc: Put thread_pid in release_task not proc_flush_pid

Oleg pointed out that in the unlikely event the kernel is compiled
with CONFIG_PROC_FS unset that release_task will now leak the pid.

Move the put_pid out of proc_flush_pid into release_task to fix this
and to guarantee I don't make that mistake again.

When possible it makes sense to keep get and put in the same function
so it can easily been seen how they pair up.

Fixes: 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/exit.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 389a88cb3081..ce2a75bc0ade 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -219,6 +219,7 @@ repeat:
 
 	write_unlock_irq(&tasklist_lock);
 	proc_flush_pid(thread_pid);
+	put_pid(thread_pid);
 	release_thread(p);
 	put_task_struct_rcu_user(p);
 
-- 
cgit 


From 4adb7a4a151c65ac7e9c3a1aa462c84190d48385 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 23 Apr 2020 22:20:44 -0700
Subject: bpf: Fix leak in LINK_UPDATE and enforce empty old_prog_fd

Fix bug of not putting bpf_link in LINK_UPDATE command.
Also enforce zeroed old_prog_fd if no BPF_F_REPLACE flag is specified.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200424052045.4002963-1-andriin@fb.com
---
 kernel/bpf/syscall.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d85f37239540..bca58c235ac0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3628,8 +3628,10 @@ static int link_update(union bpf_attr *attr)
 		return PTR_ERR(link);
 
 	new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
-	if (IS_ERR(new_prog))
-		return PTR_ERR(new_prog);
+	if (IS_ERR(new_prog)) {
+		ret = PTR_ERR(new_prog);
+		goto out_put_link;
+	}
 
 	if (flags & BPF_F_REPLACE) {
 		old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
@@ -3638,6 +3640,9 @@ static int link_update(union bpf_attr *attr)
 			old_prog = NULL;
 			goto out_put_progs;
 		}
+	} else if (attr->link_update.old_prog_fd) {
+		ret = -EINVAL;
+		goto out_put_progs;
 	}
 
 #ifdef CONFIG_CGROUP_BPF
@@ -3653,6 +3658,8 @@ out_put_progs:
 		bpf_prog_put(old_prog);
 	if (ret)
 		bpf_prog_put(new_prog);
+out_put_link:
+	bpf_link_put(link);
 	return ret;
 }
 
-- 
cgit 


From 03f87c0b45b177ba5f6b4a9bbe9f95e4aba31026 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Fri, 24 Apr 2020 15:34:27 +0200
Subject: bpf: Propagate expected_attach_type when verifying freplace programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For some program types, the verifier relies on the expected_attach_type of
the program being verified in the verification process. However, for
freplace programs, the attach type was not propagated along with the
verifier ops, so the expected_attach_type would always be zero for freplace
programs.

This in turn caused the verifier to sometimes make the wrong call for
freplace programs. For all existing uses of expected_attach_type for this
purpose, the result of this was only false negatives (i.e., freplace
functions would be rejected by the verifier even though they were valid
programs for the target they were replacing). However, should a false
positive be introduced, this can lead to out-of-bounds accesses and/or
crashes.

The fix introduced in this patch is to propagate the expected_attach_type
to the freplace program during verification, and reset it after that is
done.

Fixes: be8704ff07d2 ("bpf: Introduce dynamic program extensions")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/158773526726.293902.13257293296560360508.stgit@toke.dk
---
 kernel/bpf/verifier.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9382609147f5..fa1d8245b925 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10497,6 +10497,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 				return -EINVAL;
 			}
 			env->ops = bpf_verifier_ops[tgt_prog->type];
+			prog->expected_attach_type = tgt_prog->expected_attach_type;
 		}
 		if (!tgt_prog->jited) {
 			verbose(env, "Can attach to only JITed progs\n");
@@ -10841,6 +10842,13 @@ err_release_maps:
 		 * them now. Otherwise free_used_maps() will release them.
 		 */
 		release_maps(env);
+
+	/* extension progs temporarily inherit the attach_type of their targets
+	   for verification purposes, so set it back to zero before returning
+	 */
+	if (env->prog->type == BPF_PROG_TYPE_EXT)
+		env->prog->expected_attach_type = 0;
+
 	*prog = env->prog;
 err_unlock:
 	if (!is_priv)
-- 
cgit 


From 6f302bfb221470e712ce3e5911fb83cdca174387 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Thu, 23 Apr 2020 10:32:40 +0800
Subject: bpf: Make bpf_link_fops static

Fix the following sparse warning:

kernel/bpf/syscall.c:2289:30: warning: symbol 'bpf_link_fops' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/1587609160-117806-1-git-send-email-zou_wei@huawei.com
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bca58c235ac0..7626b8024471 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2283,7 +2283,7 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 }
 #endif
 
-const struct file_operations bpf_link_fops = {
+static const struct file_operations bpf_link_fops = {
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo	= bpf_link_show_fdinfo,
 #endif
-- 
cgit