Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller: 1) In order to avoid problems in the future, make cgroup bpf overriding explicit using BPF_F_ALLOW_OVERRIDE. From Alexei Staovoitov. 2) LLC sets skb->sk without proper skb->destructor and this explodes, fix from Eric Dumazet. 3) Make sure when we have an ipv4 mapped source address, the destination is either also an ipv4 mapped address or ipv6_addr_any(). Fix from Jonathan T. Leighton. 4) Avoid packet loss in fec driver by programming the multicast filter more intelligently. From Rui Sousa. 5) Handle multiple threads invoking fanout_add(), fix from Eric Dumazet. 6) Since we can invoke the TCP input path in process context, without BH being disabled, we have to accomodate that in the locking of the TCP probe. Also from Eric Dumazet. 7) Fix erroneous emission of NETEVENT_DELAY_PROBE_TIME_UPDATE when we aren't even updating that sysctl value. From Marcus Huewe. 8) Fix endian bugs in ibmvnic driver, from Thomas Falcon. [ This is the second version of the pull that reverts the nested rhashtable changes that looked a bit too scary for this late in the release - Linus ] * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (27 commits) rhashtable: Revert nested table changes. ibmvnic: Fix endian errors in error reporting output ibmvnic: Fix endian error when requesting device capabilities net: neigh: Fix netevent NETEVENT_DELAY_PROBE_TIME_UPDATE notification net: xilinx_emaclite: fix freezes due to unordered I/O net: xilinx_emaclite: fix receive buffer overflow bpf: kernel header files need to be copied into the tools directory tcp: tcp_probe: use spin_lock_bh() uapi: fix linux/if_pppol2tp.h userspace compilation errors packet: fix races in fanout_add() ibmvnic: Fix initial MTU settings net: ethernet: ti: cpsw: fix cpsw assignment in resume kcm: fix a null pointer dereference in kcm_sendmsg() net: fec: fix multicast filtering hardware setup ipv6: Handle IPv4-mapped src to in6addr_any dst. ipv6: Inhibit IPv4-mapped src address on the wire. net/mlx5e: Disable preemption when doing TC statistics upcall rhashtable: Add nested tables tipc: Fix tipc_sk_reinit race conditions gfs2: Use rhashtable walk interface in glock_hash_walk ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-16 08:37:18 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-16 08:37:18 -0800
commit: 3c7a9f32f9392c9dfce24f33bdc6799852903e27 (patch)
tree: 15b5365c2f82d2bd041e202fdec4d2d3342e8559 /kernel
parent: 747ae0a96f1a78b35c5a3d93ad37a16655e16340 (diff)
parent: bf3f14d6342cfb37eab8f0cddd0e4d4063fd9fc9 (diff)
3 files changed, 66 insertions, 22 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index a515f7b007c6..da0f53690295 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
 		e = rcu_dereference_protected(parent->bpf.effective[type],
 					      lockdep_is_held(&cgroup_mutex));
 		rcu_assign_pointer(cgrp->bpf.effective[type], e);
+		cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
 	}
 }
 
@@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
  *
  * Must be called with cgroup_mutex held.
  */
-void __cgroup_bpf_update(struct cgroup *cgrp,
-			 struct cgroup *parent,
-			 struct bpf_prog *prog,
-			 enum bpf_attach_type type)
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+			struct bpf_prog *prog, enum bpf_attach_type type,
+			bool new_overridable)
 {
-	struct bpf_prog *old_prog, *effective;
+	struct bpf_prog *old_prog, *effective = NULL;
 	struct cgroup_subsys_state *pos;
+	bool overridable = true;
 
-	old_prog = xchg(cgrp->bpf.prog + type, prog);
+	if (parent) {
+		overridable = !parent->bpf.disallow_override[type];
+		effective = rcu_dereference_protected(parent->bpf.effective[type],
+						      lockdep_is_held(&cgroup_mutex));
+	}
+
+	if (prog && effective && !overridable)
+		/* if parent has non-overridable prog attached, disallow
+		 * attaching new programs to descendent cgroup
+		 */
+		return -EPERM;
+
+	if (prog && effective && overridable != new_overridable)
+		/* if parent has overridable prog attached, only
+		 * allow overridable programs in descendent cgroup
+		 */
+		return -EPERM;
 
-	effective = (!prog && parent) ?
-		rcu_dereference_protected(parent->bpf.effective[type],
-					  lockdep_is_held(&cgroup_mutex)) :
-		prog;
+	old_prog = cgrp->bpf.prog[type];
+
+	if (prog) {
+		overridable = new_overridable;
+		effective = prog;
+		if (old_prog &&
+		    cgrp->bpf.disallow_override[type] == new_overridable)
+			/* disallow attaching non-overridable on top
+			 * of existing overridable in this cgroup
+			 * and vice versa
+			 */
+			return -EPERM;
+	}
+
+	if (!prog && !old_prog)
+		/* report error when trying to detach and nothing is attached */
+		return -ENOENT;
+
+	cgrp->bpf.prog[type] = prog;
 
 	css_for_each_descendant_pre(pos, &cgrp->self) {
 		struct cgroup *desc = container_of(pos, struct cgroup, self);
 
 		/* skip the subtree if the descendant has its own program */
-		if (desc->bpf.prog[type] && desc != cgrp)
+		if (desc->bpf.prog[type] && desc != cgrp) {
 			pos = css_rightmost_descendant(pos);
-		else
+		} else {
 			rcu_assign_pointer(desc->bpf.effective[type],
 					   effective);
+			desc->bpf.disallow_override[type] = !overridable;
+		}
 	}
 
 	if (prog)
@@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
 		bpf_prog_put(old_prog);
 		static_branch_dec(&cgroup_bpf_enabled_key);
 	}
+	return 0;
 }
 
 /**
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 19b6129eab23..bbb016adbaeb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -920,13 +920,14 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #ifdef CONFIG_CGROUP_BPF
 
-#define BPF_PROG_ATTACH_LAST_FIELD attach_type
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
 static int bpf_prog_attach(const union bpf_attr *attr)
 {
+	enum bpf_prog_type ptype;
 	struct bpf_prog *prog;
 	struct cgroup *cgrp;
-	enum bpf_prog_type ptype;
+	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -934,6 +935,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_PROG_ATTACH))
 		return -EINVAL;
 
+	if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+		return -EINVAL;
+
 	switch (attr->attach_type) {
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
@@ -956,10 +960,13 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return PTR_ERR(cgrp);
 	}
 
-	cgroup_bpf_update(cgrp, prog, attr->attach_type);
+	ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+				attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+	if (ret)
+		bpf_prog_put(prog);
 	cgroup_put(cgrp);
 
-	return 0;
+	return ret;
 }
 
 #define BPF_PROG_DETACH_LAST_FIELD attach_type
@@ -967,6 +974,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 static int bpf_prog_detach(const union bpf_attr *attr)
 {
 	struct cgroup *cgrp;
+	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -982,7 +990,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		if (IS_ERR(cgrp))
 			return PTR_ERR(cgrp);
 
-		cgroup_bpf_update(cgrp, NULL, attr->attach_type);
+		ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
 		cgroup_put(cgrp);
 		break;
 
@@ -990,7 +998,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	return 0;
+	return ret;
 }
 #endif /* CONFIG_CGROUP_BPF */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 688dd02af985..53bbca7c4859 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void)
 subsys_initcall(cgroup_namespaces_init);
 
 #ifdef CONFIG_CGROUP_BPF
-void cgroup_bpf_update(struct cgroup *cgrp,
-		       struct bpf_prog *prog,
-		       enum bpf_attach_type type)
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+		      enum bpf_attach_type type, bool overridable)
 {
 	struct cgroup *parent = cgroup_parent(cgrp);
+	int ret;
 
 	mutex_lock(&cgroup_mutex);
-	__cgroup_bpf_update(cgrp, parent, prog, type);
+	ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
 	mutex_unlock(&cgroup_mutex);
+	return ret;
 }
 #endif /* CONFIG_CGROUP_BPF */
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-16 08:37:18 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-16 08:37:18 -0800
commit	3c7a9f32f9392c9dfce24f33bdc6799852903e27 (patch)
tree	15b5365c2f82d2bd041e202fdec4d2d3342e8559 /kernel
parent	747ae0a96f1a78b35c5a3d93ad37a16655e16340 (diff)
parent	bf3f14d6342cfb37eab8f0cddd0e4d4063fd9fc9 (diff)