From 4f47e8ab6ab796b5380f74866fa5287aca4dcc58 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 22 Jun 2020 16:40:29 +0800 Subject: xfrm: policy: match with both mark and mask on user interfaces In commit ed17b8d377ea ("xfrm: fix a warning in xfrm_policy_insert_list"), it would take 'priority' to make a policy unique, and allow duplicated policies with different 'priority' to be added, which is not expected by userland, as Tobias reported in strongswan. To fix this duplicated policies issue, and also fix the issue in commit ed17b8d377ea ("xfrm: fix a warning in xfrm_policy_insert_list"), when doing add/del/get/update on user interfaces, this patch is to change to look up a policy with both mark and mask by doing: mark.v == pol->mark.v && mark.m == pol->mark.m and leave the check: (mark & pol->mark.m) == pol->mark.v for tx/rx path only. As the userland expects an exact mark and mask match to manage policies. v1->v2: - make xfrm_policy_mark_match inline and fix the changelog as Tobias suggested. Fixes: 295fae568885 ("xfrm: Allow user space manipulation of SPD mark") Fixes: ed17b8d377ea ("xfrm: fix a warning in xfrm_policy_insert_list") Reported-by: Tobias Brunner Tested-by: Tobias Brunner Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 11 +++++++---- net/key/af_key.c | 4 ++-- net/xfrm/xfrm_policy.c | 39 ++++++++++++++++----------------------- net/xfrm/xfrm_user.c | 18 +++++++++++------- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c7d213c9f9d8..5c20953c8deb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1630,13 +1630,16 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, void *); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, + const struct xfrm_mark *mark, + u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8, - int dir, u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, + const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, u32 id, int delete, + int *err); int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); diff --git a/net/key/af_key.c b/net/key/af_key.c index b67ed3a8486c..979c579afc63 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2400,7 +2400,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa return err; } - xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2651,7 +2651,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 564aa6492e7c..6847b3579f54 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1433,14 +1433,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_unlock_bh(&pq->hold_queue.lock); } -static bool xfrm_policy_mark_match(struct xfrm_policy *policy, - struct xfrm_policy *pol) +static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark, + struct xfrm_policy *pol) { - if (policy->mark.v == pol->mark.v && - policy->priority == pol->priority) - return true; - - return false; + return mark->v == pol->mark.v && mark->m == pol->mark.m; } static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed) @@ -1503,7 +1499,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(policy, pol) && + xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { delpol = pol; @@ -1538,7 +1534,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(policy, pol) && + xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) @@ -1610,9 +1606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) EXPORT_SYMBOL(xfrm_policy_insert); static struct xfrm_policy * -__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, - u8 type, int dir, - struct xfrm_selector *sel, +__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark, + u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx) { struct xfrm_policy *pol; @@ -1623,7 +1618,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && pol->if_id == if_id && - (mark & pol->mark.m) == pol->mark.v && + xfrm_policy_mark_match(mark, pol) && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) return pol; @@ -1632,11 +1627,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, return NULL; } -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, - struct xfrm_selector *sel, - struct xfrm_sec_ctx *ctx, int delete, - int *err) +struct xfrm_policy * +xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete, int *err) { struct xfrm_pol_inexact_bin *bin = NULL; struct xfrm_policy *pol, *ret = NULL; @@ -1703,9 +1697,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy * +xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -1720,8 +1714,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, ret = NULL; hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && - pol->if_id == if_id && - (mark & pol->mark.m) == pol->mark.v) { + pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e6cfaa680ef3..fbb7d9d06478 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1863,7 +1863,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int delete; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m); u32 if_id = 0; p = nlmsg_data(nlh); @@ -1880,8 +1879,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + xfrm_mark_get(attrs, &m); + if (p->index) - xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, + p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1898,8 +1900,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel, - ctx, delete, &err); + xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, + &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -2166,7 +2168,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m); u32 if_id = 0; err = copy_from_user_policy_type(&type, attrs); @@ -2180,8 +2181,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + xfrm_mark_get(attrs, &m); + if (p->index) - xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index, + 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -2198,7 +2202,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, + xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } -- cgit From 17175d1a27c618e214555b91eca8a0be4cf07f45 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 3 Jul 2020 16:57:09 +0200 Subject: xfrm: esp6: fix encapsulation header offset computation In commit 0146dca70b87, I incorrectly adapted the code that computes the location of the UDP or TCP encapsulation header from IPv4 to IPv6. In esp6_input_done2, skb->transport_header points to the ESP header, so by adding skb_network_header_len, uh and th will point to the ESP header, not the encapsulation header that's in front of it. Since the TCP header's size can change with options, we have to start from the IPv6 header and walk past possible extensions. Fixes: 0146dca70b87 ("xfrm: add support for UDPv6 encapsulation of ESP") Fixes: 26333c37fc28 ("xfrm: add IPv6 support for espintcp") Reported-by: Tobias Brunner Tested-by: Tobias Brunner Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c43592771126..55ae70be91b3 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -805,10 +805,16 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (x->encap) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int offset = skb_network_offset(skb) + sizeof(*ip6h); struct xfrm_encap_tmpl *encap = x->encap; - struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); - struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); - __be16 source; + u8 nexthdr = ip6h->nexthdr; + __be16 frag_off, source; + struct udphdr *uh; + struct tcphdr *th; + + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + uh = (void *)(skb->data + offset); + th = (void *)(skb->data + offset); switch (x->encap->encap_type) { case TCP_ENCAP_ESPINTCP: -- cgit From 55b244221c3f17eb2ed51c8e39e4a01c523e4eee Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 10 Jul 2020 17:04:40 +0200 Subject: selftests/bpf: Fix cgroup sockopt verifier test Since the BPF_PROG_TYPE_CGROUP_SOCKOPT verifier test does not set an attach type, bpf_prog_load_check_attach() disallows loading the program and the test is always skipped: #434/p perfevent for cgroup sockopt SKIP (unsupported program type 25) Fix the issue by setting a valid attach type. Fixes: 0456ea170cd6 ("bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT}") Signed-off-by: Jean-Philippe Brucker Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20200710150439.126627-1-jean-philippe@linaro.org --- tools/testing/selftests/bpf/verifier/event_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c index 99f8f582c02b..c5e805980409 100644 --- a/tools/testing/selftests/bpf/verifier/event_output.c +++ b/tools/testing/selftests/bpf/verifier/event_output.c @@ -112,6 +112,7 @@ "perfevent for cgroup sockopt", .insns = { __PERF_EVENT_INSNS__ }, .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, .fixup_map_event_output = { 4 }, .result = ACCEPT, .retval = 1, -- cgit From 5b801dfb7feb2738975d80223efc2fc193e55573 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 14 Jul 2020 14:09:04 -0400 Subject: bpf: Fix NULL pointer dereference in __btf_resolve_helper_id() Prevent __btf_resolve_helper_id() from dereferencing `btf_vmlinux` as NULL. This patch fixes the following syzbot bug: https://syzkaller.appspot.com/bug?id=f823224ada908fa5c207902a5a62065e53ca0fcc Reported-by: syzbot+ee09bda7017345f1fbe6@syzkaller.appspotmail.com Signed-off-by: Peilin Ye Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200714180904.277512-1-yepeilin.cs@gmail.com --- kernel/bpf/btf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 9a1a98dd9e97..0443600146dc 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4058,6 +4058,11 @@ static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, const char *tname, *sym; u32 btf_id, i; + if (!btf_vmlinux) { + bpf_log(log, "btf_vmlinux doesn't exist\n"); + return -EINVAL; + } + if (IS_ERR(btf_vmlinux)) { bpf_log(log, "btf_vmlinux is malformed\n"); return -EINVAL; -- cgit From ac1321efb14284f5572dcba57aa9da362faba751 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 16 Jul 2020 10:09:01 +0200 Subject: espintcp: support non-blocking sends Currently, non-blocking sends from userspace result in EOPNOTSUPP. To support this, we need to tell espintcp_sendskb_locked() and espintcp_sendskmsg_locked() that non-blocking operation was requested from espintcp_sendmsg(). Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Reported-by: Andrew Cagney Tested-by: Andrew Cagney Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/espintcp.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 100e29682b48..5d3d2b98c62d 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -213,7 +213,7 @@ retry: return 0; } -static int espintcp_push_msgs(struct sock *sk) +static int espintcp_push_msgs(struct sock *sk, int flags) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct espintcp_msg *emsg = &ctx->partial; @@ -227,12 +227,12 @@ static int espintcp_push_msgs(struct sock *sk) ctx->tx_running = 1; if (emsg->skb) - err = espintcp_sendskb_locked(sk, emsg, 0); + err = espintcp_sendskb_locked(sk, emsg, flags); else - err = espintcp_sendskmsg_locked(sk, emsg, 0); + err = espintcp_sendskmsg_locked(sk, emsg, flags); if (err == -EAGAIN) { ctx->tx_running = 0; - return 0; + return flags & MSG_DONTWAIT ? -EAGAIN : 0; } if (!err) memset(emsg, 0, sizeof(*emsg)); @@ -257,7 +257,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) offset = skb_transport_offset(skb); len = skb->len - offset; - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); if (emsg->len) { kfree_skb(skb); @@ -270,7 +270,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) emsg->len = len; emsg->skb = skb; - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); return 0; } @@ -287,7 +287,7 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) char buf[2] = {0}; int err, end; - if (msg->msg_flags) + if (msg->msg_flags & ~MSG_DONTWAIT) return -EOPNOTSUPP; if (size > MAX_ESPINTCP_MSG) @@ -298,9 +298,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); - err = espintcp_push_msgs(sk); + err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT); if (err < 0) { - err = -ENOBUFS; + if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT)) + err = -ENOBUFS; goto unlock; } @@ -337,10 +338,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) tcp_rate_check_app_limited(sk); - err = espintcp_push_msgs(sk); + err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT); /* this message could be partially sent, keep it */ - if (err < 0) - goto unlock; + release_sock(sk); return size; @@ -374,7 +374,7 @@ static void espintcp_tx_work(struct work_struct *work) lock_sock(sk); if (!ctx->tx_running) - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); release_sock(sk); } -- cgit From e229c877cde141a4c46cb603a341ce8c909e9a98 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 16 Jul 2020 10:09:02 +0200 Subject: espintcp: recv() should return 0 when the peer socket is closed man 2 recv says: RETURN VALUE When a stream socket peer has performed an orderly shutdown, the return value will be 0 (the traditional "end-of-file" return). Currently, this works for blocking reads, but non-blocking reads will return -EAGAIN. This patch overwrites that return value when the peer won't send us any more data. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Reported-by: Andrew Cagney Tested-by: Andrew Cagney Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/espintcp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 5d3d2b98c62d..cb83e3664680 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -109,8 +109,11 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, flags |= nonblock ? MSG_DONTWAIT : 0; skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err); - if (!skb) + if (!skb) { + if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) + return 0; return err; + } copied = len; if (copied > skb->len) -- cgit From 95a35b42bc6e5d8ce7baff8aefed10e9829e7ae5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 16 Jul 2020 10:09:03 +0200 Subject: xfrm: policy: fix IPv6-only espintcp compilation In case we're compiling espintcp support only for IPv6, we should still initialize the common code. Fixes: 26333c37fc28 ("xfrm: add IPv6 support for espintcp") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6847b3579f54..19c5e0fa3f44 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,7 +39,7 @@ #ifdef CONFIG_XFRM_STATISTICS #include #endif -#ifdef CONFIG_INET_ESPINTCP +#ifdef CONFIG_XFRM_ESPINTCP #include #endif @@ -4149,7 +4149,7 @@ void __init xfrm_init(void) seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); -#ifdef CONFIG_INET_ESPINTCP +#ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif -- cgit From 101dde4207f1daa1fda57d714814a03835dccc3f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 17 Jul 2020 10:34:27 +0200 Subject: xfrm: Fix crash when the hold queue is used. The commits "xfrm: Move dst->path into struct xfrm_dst" and "net: Create and use new helper xfrm_dst_child()." changed xfrm bundle handling under the assumption that xdst->path and dst->child are not a NULL pointer only if dst->xfrm is not a NULL pointer. That is true with one exception. If the xfrm hold queue is used to wait until a SA is installed by the key manager, we create a dummy bundle without a valid dst->xfrm pointer. The current xfrm bundle handling crashes in that case. Fix this by extending the NULL check of dst->xfrm with a test of the DST_XFRM_QUEUE flag. Fixes: 0f6c480f23f4 ("xfrm: Move dst->path into struct xfrm_dst") Fixes: b92cf4aab8e6 ("net: Create and use new helper xfrm_dst_child().") Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5c20953c8deb..51f65d23ebaf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -941,7 +941,7 @@ struct xfrm_dst { static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) { #ifdef CONFIG_XFRM - if (dst->xfrm) { + if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) { const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst; return xdst->path; @@ -953,7 +953,7 @@ static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst) static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst) { #ifdef CONFIG_XFRM - if (dst->xfrm) { + if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) { struct xfrm_dst *xdst = (struct xfrm_dst *) dst; return xdst->child; } -- cgit From 37bd22420f856fcd976989f1d4f1f7ad28e1fcac Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 22 Jul 2020 04:00:53 -0700 Subject: af_key: pfkey_dump needs parameter validation In pfkey_dump() dplen and splen can both be specified to access the xfrm_address_t structure out of bounds in__xfrm_state_filter_match() when it calls addr_match() with the indexes. Return EINVAL if either are out of range. Signed-off-by: Mark Salyzyn Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: kernel-team@android.com Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Cc: Jakub Kicinski Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Steffen Klassert --- net/key/af_key.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 979c579afc63..a915bc86620a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; + if ((xfilter->sadb_x_filter_splen >= + (sizeof(xfrm_address_t) << 3)) || + (xfilter->sadb_x_filter_dplen >= + (sizeof(xfrm_address_t) << 3))) { + mutex_unlock(&pfk->dump_lock); + return -EINVAL; + } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); -- cgit From 367fe04eb64760fab400bdc990cc6067071e1e39 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 27 Jul 2020 12:24:33 +0200 Subject: mptcp: fix joined subflows with unblocking sk Unblocking sockets used for outgoing connections were not containing inet info about the initial connection due to a typo there: the value of "err" variable is negative in the kernelspace. This fixes the creation of additional subflows where the remote port has to be reused if the other host didn't announce another one. This also fixes inet_diag showing blank info about MPTCP sockets from unblocking sockets doing a connect(). Fixes: 41be81a8d3d0 ("mptcp: fix unblocking connect()") Signed-off-by: Matthieu Baerts Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3980fbb6f31e..c0abe738e7d3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1833,7 +1833,7 @@ do_connect: /* on successful connect, the msk state will be moved to established by * subflow_finish_connect() */ - if (!err || err == EINPROGRESS) + if (!err || err == -EINPROGRESS) mptcp_copy_inaddrs(sock->sk, ssock->sk); else inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); -- cgit From 5fd82200d870a5dd3e509c98ef2041f580b2c0e1 Mon Sep 17 00:00:00 2001 From: laurent brando Date: Mon, 27 Jul 2020 18:26:14 +0800 Subject: net: mscc: ocelot: fix hardware timestamp dequeue logic The next hw timestamp should be snapshoot to the read registers only once the current timestamp has been read. If none of the pending skbs matches the current HW timestamp just gracefully flush the available timestamp by reading it. Signed-off-by: laurent brando Signed-off-by: Vladimir Oltean Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 9cfe1fd98c30..f17da67a4622 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -748,21 +748,21 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) spin_unlock_irqrestore(&port->tx_skbs.lock, flags); - /* Next ts */ - ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT); + /* Get the h/w timestamp */ + ocelot_get_hwtimestamp(ocelot, &ts); if (unlikely(!skb_match)) continue; - /* Get the h/w timestamp */ - ocelot_get_hwtimestamp(ocelot, &ts); - /* Set the timestamp into the skb */ memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); skb_tstamp_tx(skb_match, &shhwtstamps); dev_kfree_skb_any(skb_match); + + /* Next ts */ + ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT); } } EXPORT_SYMBOL(ocelot_get_txtstamp); -- cgit From 955cbe91bcf782c09afe369c95a20f0a4b6dcc3c Mon Sep 17 00:00:00 2001 From: Tanner Love Date: Mon, 27 Jul 2020 12:25:28 -0400 Subject: selftests/net: rxtimestamp: fix clang issues for target arch PowerPC The signedness of char is implementation-dependent. Some systems (including PowerPC and ARM) use unsigned char. Clang 9 threw: warning: result of comparison of constant -1 with expression of type \ 'char' is always true [-Wtautological-constant-out-of-range-compare] &arg_index)) != -1) { Tested: make -C tools/testing/selftests TARGETS="net" run_tests Fixes: 16e781224198 ("selftests/net: Add a test to validate behavior of rx timestamps") Signed-off-by: Tanner Love Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/rxtimestamp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 422e7761254d..bcb79ba1f214 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -329,8 +329,7 @@ int main(int argc, char **argv) bool all_tests = true; int arg_index = 0; int failures = 0; - int s, t; - char opt; + int s, t, opt; while ((opt = getopt_long(argc, argv, "", long_options, &arg_index)) != -1) { -- cgit From 64f9ede2274980076423583683d44480909b7a40 Mon Sep 17 00:00:00 2001 From: Tanner Love Date: Mon, 27 Jul 2020 12:25:29 -0400 Subject: selftests/net: psock_fanout: fix clang issues for target arch PowerPC Clang 9 threw: warning: format specifies type 'unsigned short' but the argument has \ type 'int' [-Wformat] typeflags, PORT_BASE, PORT_BASE + port_off); Tested: make -C tools/testing/selftests TARGETS="net" run_tests Fixes: 77f65ebdca50 ("packet: packet fanout rollover during socket overload") Signed-off-by: Tanner Love Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/psock_fanout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 8c8c7d79c38d..2c522f7a0aec 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -350,7 +350,8 @@ static int test_datapath(uint16_t typeflags, int port_off, int fds[2], fds_udp[2][2], ret; fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n", - typeflags, PORT_BASE, PORT_BASE + port_off); + typeflags, (uint16_t)PORT_BASE, + (uint16_t)(PORT_BASE + port_off)); fds[0] = sock_fanout_open(typeflags, 0); fds[1] = sock_fanout_open(typeflags, 0); -- cgit From b4da96ffd30bd4a305045ba5c9b0de5d4aa20dc7 Mon Sep 17 00:00:00 2001 From: Tanner Love Date: Mon, 27 Jul 2020 12:25:30 -0400 Subject: selftests/net: so_txtime: fix clang issues for target arch PowerPC On powerpcle, int64_t maps to long long. Clang 9 threw: warning: absolute value function 'labs' given an argument of type \ 'long long' but has parameter of type 'long' which may cause \ truncation of value [-Wabsolute-value] if (labs(tstop - texpect) > cfg_variance_us) Tested: make -C tools/testing/selftests TARGETS="net" run_tests Fixes: af5136f95045 ("selftests/net: SO_TXTIME with ETF and FQ") Signed-off-by: Tanner Love Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/so_txtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index ceaad78e9667..3155fbbf644b 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -121,7 +121,7 @@ static bool do_recv_one(int fdr, struct timed_send *ts) if (rbuf[0] != ts->data) error(1, 0, "payload mismatch. expected %c", ts->data); - if (labs(tstop - texpect) > cfg_variance_us) + if (llabs(tstop - texpect) > cfg_variance_us) error(1, 0, "exceeds variance (%d us)", cfg_variance_us); return false; -- cgit From 94b6c13be57cdedb7cf4d33dbcd066fad133f22b Mon Sep 17 00:00:00 2001 From: Tanner Love Date: Mon, 27 Jul 2020 12:25:31 -0400 Subject: selftests/net: tcp_mmap: fix clang warning for target arch PowerPC When size_t maps to unsigned int (e.g. on 32-bit powerpc), then the comparison with 1<<35 is always true. Clang 9 threw: warning: result of comparison of constant 34359738368 with \ expression of type 'size_t' (aka 'unsigned int') is always true \ [-Wtautological-constant-out-of-range-compare] while (total < FILE_SZ) { Tested: make -C tools/testing/selftests TARGETS="net" run_tests Fixes: 192dc405f308 ("selftests: net: add tcp_mmap program") Signed-off-by: Tanner Love Acked-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 4555f88252ba..a61b7b3da549 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -344,7 +344,7 @@ int main(int argc, char *argv[]) { struct sockaddr_storage listenaddr, addr; unsigned int max_pacing_rate = 0; - size_t total = 0; + uint64_t total = 0; char *host = NULL; int fd, c, on = 1; char *buffer; @@ -473,12 +473,12 @@ int main(int argc, char *argv[]) zflg = 0; } while (total < FILE_SZ) { - ssize_t wr = FILE_SZ - total; + int64_t wr = FILE_SZ - total; if (wr > chunk_size) wr = chunk_size; /* Note : we just want to fill the pipe with 0 bytes */ - wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0); + wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0); if (wr <= 0) break; total += wr; -- cgit From 181964e619b76ae2e71bcdc6001cf977bec4cf6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 27 Jul 2020 19:22:20 +0100 Subject: fix a braino in cmsghdr_from_user_compat_to_kern() commit 547ce4cfb34c ("switch cmsghdr_from_user_compat_to_kern() to copy_from_user()") missed one of the places where ucmlen should've been replaced with cmsg.cmsg_len, now that we are fetching the entire struct rather than doing it field-by-field. As the result, compat sendmsg() with several different-sized cmsg attached started to fail with EINVAL. Trivial to fix, fortunately. Fixes: 547ce4cfb34c ("switch cmsghdr_from_user_compat_to_kern() to copy_from_user()") Reported-by: Nick Bowler Tested-by: Nick Bowler Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/compat.c b/net/compat.c index 5e3041a2c37d..434838bef5f8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -202,7 +202,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, /* Advance. */ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); - ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); + ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len); } /* -- cgit From d5dba1376e2bafec0f4408dc65706c5908964083 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 27 Jul 2020 16:03:47 +0200 Subject: xfrm: esp6: fix the location of the transport header with encapsulation commit 17175d1a27c6 ("xfrm: esp6: fix encapsulation header offset computation") changed esp6_input_done2 to correctly find the size of the IPv6 header that precedes the TCP/UDP encapsulation header, but didn't adjust the final call to skb_set_transport_header, which I assumed was correct in using skb_network_header_len. Xiumei Mu reported that when we create xfrm states that include port numbers in the selector, traffic from the user sockets is dropped. It turns out that we get a state mismatch in __xfrm_policy_check, because we end up trying to compare the encapsulation header's ports with the selector that's based on user traffic ports. Fixes: 0146dca70b87 ("xfrm: add support for UDPv6 encapsulation of ESP") Fixes: 26333c37fc28 ("xfrm: add IPv6 support for espintcp") Reported-by: Xiumei Mu Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 55ae70be91b3..52c2f063529f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -815,6 +815,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); uh = (void *)(skb->data + offset); th = (void *)(skb->data + offset); + hdr_len += offset; switch (x->encap->encap_type) { case TCP_ENCAP_ESPINTCP: -- cgit From cfdaeba5ddc98b303639a3265c2031ac5db249d6 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 28 Jul 2020 10:16:48 +0800 Subject: net: hns3: fix desc filling bug when skb is expanded or lineared The linear and frag data part may be changed when the skb is expanded or lineared in skb_cow_head() or skb_checksum_help(), which is called by hns3_fill_skb_desc(), so the linear len return by skb_headlen() before the calling of hns3_fill_skb_desc() is unreliable. Move hns3_fill_skb_desc() before the calling of skb_headlen() to fix this bug. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yunsheng Lin Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 33c481d11116..3328500c0543 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1093,16 +1093,8 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, int k, sizeoflast; dma_addr_t dma; - if (type == DESC_TYPE_SKB) { - struct sk_buff *skb = (struct sk_buff *)priv; - int ret; - - ret = hns3_fill_skb_desc(ring, skb, desc); - if (unlikely(ret < 0)) - return ret; - - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - } else if (type == DESC_TYPE_FRAGLIST_SKB) { + if (type == DESC_TYPE_FRAGLIST_SKB || + type == DESC_TYPE_SKB) { struct sk_buff *skb = (struct sk_buff *)priv; dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); @@ -1439,6 +1431,10 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) next_to_use_head = ring->next_to_use; + ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use]); + if (unlikely(ret < 0)) + goto fill_err; + ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); if (unlikely(ret < 0)) goto fill_err; -- cgit From a7e90ee5965fafc53d36e8b3205f08c88d7bc11f Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 28 Jul 2020 10:16:49 +0800 Subject: net: hns3: fix a TX timeout issue When the queue depth and queue parameters are modified, there is a low probability that TX timeout occurs. The two operations cause the link to be down or up when the watchdog is still working. All queues are stopped when the link is down. After the carrier is on, all queues are woken up. If the watchdog detects the link between the carrier on and wakeup queues, a false TX timeout occurs. So fix this issue by modifying the sequence of carrier on and queue wakeup, which is symmetrical to the link down action. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 3328500c0543..71ed4c54f6d5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4136,8 +4136,8 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup) return; if (linkup) { - netif_carrier_on(netdev); netif_tx_wake_all_queues(netdev); + netif_carrier_on(netdev); if (netif_msg_link(handle)) netdev_info(netdev, "link up\n"); } else { -- cgit From a6f7bfdc78ddd8d719d108fef973b4e4a5a6ac6b Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 28 Jul 2020 10:16:50 +0800 Subject: net: hns3: add reset check for VF updating port based VLAN Currently hclgevf_update_port_base_vlan_info() may be called when VF is resetting, which may cause hns3_nic_net_open() being called twice unexpectedly. So fix it by adding a reset check for it, and extend critical region for rntl_lock in hclgevf_update_port_base_vlan_info(). Fixes: 92f11ea177cd ("net: hns3: fix set port based VLAN issue for VF") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 30 +++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index a10b022d1951..b8b72ac9935f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3439,23 +3439,35 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, { struct hnae3_handle *nic = &hdev->nic; struct hclge_vf_to_pf_msg send_msg; + int ret; rtnl_lock(); - hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); - rtnl_unlock(); + + if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) { + dev_warn(&hdev->pdev->dev, + "is resetting when updating port based vlan info\n"); + rtnl_unlock(); + return; + } + + ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT); + if (ret) { + rtnl_unlock(); + return; + } /* send msg to PF and wait update port based vlan info */ hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, HCLGE_MBX_PORT_BASE_VLAN_CFG); memcpy(send_msg.data, port_base_vlan_info, data_size); - hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); - - if (state == HNAE3_PORT_BASE_VLAN_DISABLE) - nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE; - else - nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE; + ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); + if (!ret) { + if (state == HNAE3_PORT_BASE_VLAN_DISABLE) + nic->port_base_vlan_state = state; + else + nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE; + } - rtnl_lock(); hclgevf_notify_client(hdev, HNAE3_UP_CLIENT); rtnl_unlock(); } -- cgit From efe3fa45f770f1d66e2734ee7a3523c75694ff04 Mon Sep 17 00:00:00 2001 From: Guojia Liao Date: Tue, 28 Jul 2020 10:16:51 +0800 Subject: net: hns3: fix aRFS FD rules leftover after add a user FD rule When user had created a FD rule, all the aRFS rules should be clear up. HNS3 process flow as below: 1.get spin lock of fd_ruls_list 2.clear up all aRFS rules 3.release lock 4.get spin lock of fd_ruls_list 5.creat a rules 6.release lock; There is a short period of time between step 3 and step 4, which would creatting some new aRFS FD rules if driver was receiving packet. So refactor the fd_rule_lock to fix it. Fixes: 441228875706 ("net: hns3: refine the flow director handle") Signed-off-by: Guojia Liao Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 28 ++++++++++++---------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index bb4a6327035d..cee84e7080d6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5806,9 +5806,9 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, /* to avoid rule conflict, when user configure rule by ethtool, * we need to clear all arfs rules */ + spin_lock_bh(&hdev->fd_rule_lock); hclge_clear_arfs_rules(handle); - spin_lock_bh(&hdev->fd_rule_lock); ret = hclge_fd_config_rule(hdev, rule); spin_unlock_bh(&hdev->fd_rule_lock); @@ -5851,6 +5851,7 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle, return ret; } +/* make sure being called after lock up with fd_rule_lock */ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, bool clear_list) { @@ -5863,7 +5864,6 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, if (!hnae3_dev_fd_supported(hdev)) return; - spin_lock_bh(&hdev->fd_rule_lock); for_each_set_bit(location, hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location, @@ -5880,8 +5880,6 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, bitmap_zero(hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); } - - spin_unlock_bh(&hdev->fd_rule_lock); } static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@ -6263,7 +6261,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, u16 flow_id, struct flow_keys *fkeys) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_fd_rule_tuples new_tuples; + struct hclge_fd_rule_tuples new_tuples = {}; struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; u16 tmp_queue_id; @@ -6273,19 +6271,17 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, if (!hnae3_dev_fd_supported(hdev)) return -EOPNOTSUPP; - memset(&new_tuples, 0, sizeof(new_tuples)); - hclge_fd_get_flow_tuples(fkeys, &new_tuples); - - spin_lock_bh(&hdev->fd_rule_lock); - /* when there is already fd rule existed add by user, * arfs should not work */ + spin_lock_bh(&hdev->fd_rule_lock); if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) { spin_unlock_bh(&hdev->fd_rule_lock); return -EOPNOTSUPP; } + hclge_fd_get_flow_tuples(fkeys, &new_tuples); + /* check is there flow director filter existed for this flow, * if not, create a new filter for it; * if filter exist with different queue id, modify the filter; @@ -6368,6 +6364,7 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev) #endif } +/* make sure being called after lock up with fd_rule_lock */ static void hclge_clear_arfs_rules(struct hnae3_handle *handle) { #ifdef CONFIG_RFS_ACCEL @@ -6420,10 +6417,14 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) hdev->fd_en = enable; clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; - if (!enable) + + if (!enable) { + spin_lock_bh(&hdev->fd_rule_lock); hclge_del_all_fd_entries(handle, clear); - else + spin_unlock_bh(&hdev->fd_rule_lock); + } else { hclge_restore_fd_entries(handle); + } } static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) @@ -6886,8 +6887,9 @@ static void hclge_ae_stop(struct hnae3_handle *handle) int i; set_bit(HCLGE_STATE_DOWN, &hdev->state); - + spin_lock_bh(&hdev->fd_rule_lock); hclge_clear_arfs_rules(handle); + spin_unlock_bh(&hdev->fd_rule_lock); /* If it is not PF reset, the firmware will disable the MAC, * so it only need to stop phy here. -- cgit From b7b5d25bdd7bdea7d72a41e0a97b1b8f3dea2ee7 Mon Sep 17 00:00:00 2001 From: Guojia Liao Date: Tue, 28 Jul 2020 10:16:52 +0800 Subject: net: hns3: fix for VLAN config when reset failed When device is resetting or reset failed, firmware is unable to handle mailbox. VLAN should not be configured in this case. Fixes: fe4144d47eef ("net: hns3: sync VLAN filter entries when kill VLAN ID failed") Signed-off-by: Guojia Liao Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 ++++--- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index cee84e7080d6..36575e72a915 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9042,11 +9042,12 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, bool writen_to_tbl = false; int ret = 0; - /* When device is resetting, firmware is unable to handle - * mailbox. Just record the vlan id, and remove it after + /* When device is resetting or reset failed, firmware is unable to + * handle mailbox. Just record the vlan id, and remove it after * reset finished. */ - if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && is_kill) { + if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, vport->vlan_del_fail_bmap); return -EBUSY; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index b8b72ac9935f..9162856de1b1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1592,11 +1592,12 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, if (proto != htons(ETH_P_8021Q)) return -EPROTONOSUPPORT; - /* When device is resetting, firmware is unable to handle - * mailbox. Just record the vlan id, and remove it after + /* When device is resetting or reset failed, firmware is unable to + * handle mailbox. Just record the vlan id, and remove it after * reset finished. */ - if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) && is_kill) { + if ((test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) || + test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, hdev->vlan_del_fail_bmap); return -EBUSY; } @@ -3443,7 +3444,8 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, rtnl_lock(); - if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) { + if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) || + test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) { dev_warn(&hdev->pdev->dev, "is resetting when updating port based vlan info\n"); rtnl_unlock(); -- cgit From 2b8e9c7c3fd0e31091edb1c66cc06ffe4988ca21 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 27 Jun 2020 13:29:28 +0300 Subject: net/mlx5: E-switch, Destroy TSAR when fail to enable the mode When either esw_legacy_enable() or esw_offloads_enable() fails, code missed to destroy the created TSAR. Hence, add the missing call to destroy the TSAR. Fixes: 610090ebce92 ("net/mlx5: E-switch, Initialize TSAR Qos hardware block before its user vports") Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1116ab9bea6c..9701f0f8be50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1608,7 +1608,7 @@ abort: mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } - + esw_destroy_tsar(esw); return err; } -- cgit From 0c2600c619578f759cf3d5192b01bd14e281f24c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 27 Jun 2020 13:11:56 +0300 Subject: net/mlx5: E-switch, Destroy TSAR after reload interface When eswitch offloads is enabled, TSAR is created before reloading the interfaces. However when eswitch offloads mode is disabled, TSAR is disabled before reloading the interfaces. To keep the eswitch enable/disable sequence as mirror, destroy TSAR after reloading the interfaces. Fixes: 1bd27b11c1df ("net/mlx5: Introduce E-switch QoS management") Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9701f0f8be50..d9376627584e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1653,8 +1653,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) else if (esw->mode == MLX5_ESWITCH_OFFLOADS) esw_offloads_disable(esw); - esw_destroy_tsar(esw); - old_mode = esw->mode; esw->mode = MLX5_ESWITCH_NONE; @@ -1664,6 +1662,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } + esw_destroy_tsar(esw); + if (clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); } -- cgit From 59f8f7c84c506cd4fafbfa47acd23ebdb6256dac Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 25 Jun 2020 10:56:07 +0300 Subject: net/mlx5: Fix forward to next namespace The steering tree is as follow (nic RX as example): --------- |root_ns| --------- | -------------------------------- | | | ---------- ---------- --------- |p(prio)0| | p1 | | pn | ---------- ---------- --------- | | ---------------- --------------- |ns(e.g bypass)| |ns(e.g. lag) | ---------------- --------------- | | | ---- ---- ---- |p0| |p1| |pn| ---- ---- ---- | ---- |FT| ---- find_next_chained_ft(prio) returns the first flow table in the next priority. If prio is a parent of a flow table then it returns the first flow table in the next priority in the same namespace, else if prio is parent of namespace, then it should return the first flow table in the next namespace. Currently if the user requests to forward to next namespace, the code calls to find_next_chained_ft with the prio of the next namespace and not the prio of the namesapce itself. Fixes: 9254f8ed15b6 ("net/mlx5: Add support in forward to namespace") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 28 ++++------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 13e2fb79c21a..2569bb6228b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -797,7 +797,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, return ft; } -/* If reverse if false then return the first flow table in next priority of +/* If reverse is false then return the first flow table in next priority of * prio in the tree, else return the last flow table in the previous priority * of prio in the tree. */ @@ -829,34 +829,16 @@ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) return find_closest_ft(prio, true); } -static struct fs_prio *find_fwd_ns_prio(struct mlx5_flow_root_namespace *root, - struct mlx5_flow_namespace *ns) -{ - struct mlx5_flow_namespace *root_ns = &root->ns; - struct fs_prio *iter_prio; - struct fs_prio *prio; - - fs_get_obj(prio, ns->node.parent); - list_for_each_entry(iter_prio, &root_ns->node.children, node.list) { - if (iter_prio == prio && - !list_is_last(&prio->node.children, &iter_prio->node.list)) - return list_next_entry(iter_prio, node.list); - } - return NULL; -} - static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, struct mlx5_flow_act *flow_act) { - struct mlx5_flow_root_namespace *root = find_root(&ft->node); struct fs_prio *prio; + bool next_ns; - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) - prio = find_fwd_ns_prio(root, ft->ns); - else - fs_get_obj(prio, ft->node.parent); + next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; + fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); - return (prio) ? find_next_chained_ft(prio) : NULL; + return find_next_chained_ft(prio); } static int connect_fts_in_prio(struct mlx5_core_dev *dev, -- cgit From 5cd39b6e9a420329a9a408894be7ba8aa7dd755e Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 1 Jul 2020 12:21:53 +0300 Subject: net/mlx5e: Fix error path of device attach On failure to attach the netdev, fix the rollback by re-setting the device's state back to MLX5E_STATE_DESTROYING. Failing to attach doesn't stop statistics polling via .ndo_get_stats64. In this case, although the device is not attached, it falsely continues to query the firmware for counters. Setting the device's state back to MLX5E_STATE_DESTROYING prevents the firmware counters query. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 081f15074cac..31f9ecae98df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5390,6 +5390,8 @@ err_cleanup_tx: profile->cleanup_tx(priv); out: + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + cancel_work_sync(&priv->update_stats_work); return err; } -- cgit From 0e2e7aa57b8060ee357839dc4f1731a724ba387b Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 8 Jul 2020 13:01:36 +0300 Subject: net/mlx5e: Fix missing cleanup of ethtool steering during rep rx cleanup The cited commit add initialization of ethtool steering during representor rx initializations without cleaning it up in representor rx cleanup, this may cause for stale ethtool flows to remain after moving back from switchdev mode to legacy mode. Fixed by calling ethtool steering cleanup during rep rx cleanup. Fixes: 6783e8b29f63 ("net/mlx5e: Init ethtool steering for representors") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 006807e04eda..8c294ab43f90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -936,6 +936,7 @@ err_close_drop_rq: static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { + mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); -- cgit From 88c8cf92db48b2e359fe3051ad8e09829c1bee5d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 8 Jul 2020 18:53:19 +0300 Subject: net/mlx5: Fix a bug of using ptp channel index as pin index On PTP mlx5_ptp_enable(on=0) flow, driver mistakenly used channel index as pin index. After ptp patch marked in fixes tag was introduced, driver can freely call ptp_find_pin() as part of the .enable() callback. Fix driver mlx5_ptp_enable(on=0) flow to always use ptp_find_pin(). With that, Driver will use the correct pin index in mlx5_ptp_enable(on=0) flow. In addition, when initializing the pins, always set channel to zero. As all pins can be attached to all channels, let ptp_set_pinfunc() to move them between the channels. For stable branches, this fix to be applied only on kernels that includes both patches in fixes tag. Otherwise, mlx5_ptp_enable(on=0) will be stuck on pincfg_mux. Fixes: 62582a7ee783 ("ptp: Avoid deadlocks in the programmable pin code.") Fixes: ee7f12205abc ("net/mlx5e: Implement 1PPS support") Signed-off-by: Eran Ben Elisha Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index ef0706d15a5b..c6967e1a560b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -273,17 +273,17 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, if (rq->extts.index >= clock->ptp_info.n_pins) return -EINVAL; + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + if (on) { - pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); - if (pin < 0) - return -EBUSY; pin_mode = MLX5_PIN_MODE_IN; pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); field_select = MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | MLX5_MTPPS_FS_ENABLE; } else { - pin = rq->extts.index; field_select = MLX5_MTPPS_FS_ENABLE; } @@ -331,12 +331,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; - if (on) { - pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, - rq->perout.index); - if (pin < 0) - return -EBUSY; + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; ts.tv_sec = rq->perout.period.sec; @@ -362,7 +362,6 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, MLX5_MTPPS_FS_ENABLE | MLX5_MTPPS_FS_TIME_STAMP; } else { - pin = rq->perout.index; field_select = MLX5_MTPPS_FS_ENABLE; } @@ -452,7 +451,7 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock) "mlx5_pps%d", i); clock->ptp_info.pin_config[i].index = i; clock->ptp_info.pin_config[i].func = PTP_PF_NONE; - clock->ptp_info.pin_config[i].chan = i; + clock->ptp_info.pin_config[i].chan = 0; } return 0; -- cgit From 071995c877a8646209d55ff8edddd2b054e7424c Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 8 Jul 2020 11:10:01 +0300 Subject: net/mlx5: Verify Hardware supports requested ptp function on a given pin Fix a bug where driver did not verify Hardware pin capabilities for PTP functions. Fixes: ee7f12205abc ("net/mlx5e: Implement 1PPS support") Signed-off-by: Eran Ben Elisha Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 23 +++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index c6967e1a560b..284806e331bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -408,10 +408,31 @@ static int mlx5_ptp_enable(struct ptp_clock_info *ptp, return 0; } +enum { + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0), + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1), +}; + static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan) { - return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + switch (func) { + case PTP_PF_NONE: + return 0; + case PTP_PF_EXTTS: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN); + case PTP_PF_PEROUT: + return !(clock->pps_info.pin_caps[pin] & + MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT); + default: + return -EOPNOTSUPP; + } + + return -EOPNOTSUPP; } static const struct ptp_clock_info mlx5_ptp_clock_info = { -- cgit From 21083309cab1a66877b410cd61340b5dd7cf4875 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 9 Jul 2020 15:51:53 +0300 Subject: net/mlx5e: Fix slab-out-of-bounds in mlx5e_rep_is_lag_netdev mlx5e_rep_is_lag_netdev is used as first check as part of netdev events handler for bond device of non-uplink representors, this handler can get any netdevice under the same network namespace of mlx5e netdevice. Current code treats the netdev as mlx5e netdev and only later on verifies this, hence causes the following Kasan trace: [15402.744990] ================================================================== [15402.746942] BUG: KASAN: slab-out-of-bounds in mlx5e_rep_is_lag_netdev+0xcb/0xf0 [mlx5_core] [15402.749009] Read of size 8 at addr ffff880391f3f6b0 by task ovs-vswitchd/5347 [15402.752065] CPU: 7 PID: 5347 Comm: ovs-vswitchd Kdump: loaded Tainted: G B O --------- -t - 4.18.0-g3dcc204d291d-dirty #1 [15402.755349] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [15402.757600] Call Trace: [15402.758968] dump_stack+0x71/0xab [15402.760427] print_address_description+0x6a/0x270 [15402.761969] kasan_report+0x179/0x2d0 [15402.763445] ? mlx5e_rep_is_lag_netdev+0xcb/0xf0 [mlx5_core] [15402.765121] mlx5e_rep_is_lag_netdev+0xcb/0xf0 [mlx5_core] [15402.766782] mlx5e_rep_esw_bond_netevent+0x129/0x620 [mlx5_core] Fix by deferring the violating access to be post the netdev verify check. Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule") Signed-off-by: Raed Salem Reviewed-by: Roi Dayan Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index bdb71332cbf2..3e44e4d820c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -183,13 +183,16 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *priv; /* A given netdev is not a representor or not a slave of LAG configuration */ if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev)) return false; + priv = netdev_priv(netdev); + rpriv = priv->ppriv; + /* Egress acl forward to vport is supported only non-uplink representor */ return rpriv->rep->vport != MLX5_VPORT_UPLINK; } -- cgit From ed56d749c366be269d58b29597392e4a0ae71c0a Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 20 Jul 2020 18:34:37 +0300 Subject: net/mlx5: Query PPS pin operational status before registering it In a special configuration, a ConnectX6-Dx pin pps-out might be activated when driver is loaded. Fix the driver to always read the operational pin mode when registering it, and advertise it accordingly. Fixes: ee7f12205abc ("net/mlx5e: Implement 1PPS support") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 284806e331bd..2d55b7c22c03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -452,6 +452,38 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .verify = NULL, }; +static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, + u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {}; + + MLX5_SET(mtpps_reg, in, pin, pin); + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +{ + struct mlx5_core_dev *mdev = clock->mdev; + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; + u8 mode; + int err; + + err = mlx5_query_mtpps_pin_mode(mdev, pin, out, sizeof(out)); + if (err || !MLX5_GET(mtpps_reg, out, enable)) + return PTP_PF_NONE; + + mode = MLX5_GET(mtpps_reg, out, pin_mode); + + if (mode == MLX5_PIN_MODE_IN) + return PTP_PF_EXTTS; + else if (mode == MLX5_PIN_MODE_OUT) + return PTP_PF_PEROUT; + + return PTP_PF_NONE; +} + static int mlx5_init_pin_config(struct mlx5_clock *clock) { int i; @@ -471,7 +503,7 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock) sizeof(clock->ptp_info.pin_config[i].name), "mlx5_pps%d", i); clock->ptp_info.pin_config[i].index = i; - clock->ptp_info.pin_config[i].func = PTP_PF_NONE; + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); clock->ptp_info.pin_config[i].chan = 0; } -- cgit From 7d0314b11cdd92bca8b89684c06953bf114605fc Mon Sep 17 00:00:00 2001 From: Ron Diskin Date: Sun, 5 Apr 2020 13:58:40 +0300 Subject: net/mlx5e: Modify uplink state on interface up/down When setting the PF interface up/down, notify the firmware to update uplink state via MODIFY_VPORT_STATE, when E-Switch is enabled. This behavior will prevent sending traffic out on uplink port when PF is down, such as sending traffic from a VF interface which is still up. Currently when calling mlx5e_open/close(), the driver only sends PAOS command to notify the firmware to set the physical port state to up/down, however, it is not sufficient. When VF is in "auto" state, it follows the uplink state, which was not updated on mlx5e_open/close() before this patch. When switchdev mode is enabled and uplink representor is first enabled, set the uplink port state value back to its FW default "AUTO". Fixes: 63bfd399de55 ("net/mlx5e: Send PAOS command on interface up/down") Signed-off-by: Ron Diskin Reviewed-by: Roi Dayan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 25 ++++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 16 +++++++++------ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 1 + 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 31f9ecae98df..07fdbea7ea13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3069,6 +3069,25 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; } +static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, + enum mlx5_port_status state) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int vport_admin_state; + + mlx5_set_port_admin_status(mdev, state); + + if (!MLX5_ESWITCH_MANAGER(mdev) || mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS) + return; + + if (state == MLX5_PORT_UP) + vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO; + else + vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state); +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3101,7 +3120,7 @@ int mlx5e_open(struct net_device *netdev) mutex_lock(&priv->state_lock); err = mlx5e_open_locked(netdev); if (!err) - mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP); + mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP); mutex_unlock(&priv->state_lock); return err; @@ -3135,7 +3154,7 @@ int mlx5e_close(struct net_device *netdev) return -ENODEV; mutex_lock(&priv->state_lock); - mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN); + mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -5182,7 +5201,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) /* Marking the link as currently not needed by the Driver */ if (!netif_running(netdev)) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN); mlx5e_set_netdev_mtu_boundaries(priv); mlx5e_set_dev_port_mtu(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8c294ab43f90..9519a61bd8ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1081,6 +1081,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5e_rep_tc_enable(priv); + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d9376627584e..71d01143c455 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1826,6 +1826,8 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int opmod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT; + int other_vport = 1; int err = 0; if (!ESW_ALLOWED(esw)) @@ -1833,15 +1835,17 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, if (IS_ERR(evport)) return PTR_ERR(evport); + if (vport == MLX5_VPORT_UPLINK) { + opmod = MLX5_VPORT_STATE_OP_MOD_UPLINK; + other_vport = 0; + vport = 0; + } mutex_lock(&esw->state_lock); - err = mlx5_modify_vport_admin_state(esw->dev, - MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, 1, link_state); + err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state); if (err) { - mlx5_core_warn(esw->dev, - "Failed to set vport %d link state, err = %d", - vport, err); + mlx5_core_warn(esw->dev, "Failed to set vport %d link state, opmod = %d, err = %d", + vport, opmod, err); goto unlock; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a5175e98c0b3..5785596f13f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -680,6 +680,8 @@ static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { r static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) { return ERR_PTR(-EOPNOTSUPP); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 073b79eacc99..1340e02b14ef 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4381,6 +4381,7 @@ struct mlx5_ifc_query_vport_state_out_bits { enum { MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT = 0x0, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1, + MLX5_VPORT_STATE_OP_MOD_UPLINK = 0x2, }; struct mlx5_ifc_arm_monitor_counter_in_bits { -- cgit From 350a63249d270b1f5bd05c7e2a24cd8de0f9db20 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Wed, 15 Jul 2020 11:46:30 +0300 Subject: net/mlx5e: Fix kernel crash when setting vf VLANID on a VF dev After the cited commit, function 'mlx5_eswitch_set_vport_vlan' started to acquire esw->state_lock. However, esw is not defined for VF devices, hence attempting to set vf VLANID on a VF dev will cause a kernel panic. Fix it by moving up the (redundant) esw validation from function '__mlx5_eswitch_set_vport_vlan' since the rest of the callers now have and use a valid esw. For example with vf device eth4: # ip link set dev eth4 vf 0 vlan 0 Trace of the panic: [ 411.409842] BUG: unable to handle page fault for address: 00000000000011b8 [ 411.449745] #PF: supervisor read access in kernel mode [ 411.452348] #PF: error_code(0x0000) - not-present page [ 411.454938] PGD 80000004189c9067 P4D 80000004189c9067 PUD 41899a067 PMD 0 [ 411.458382] Oops: 0000 [#1] SMP PTI [ 411.460268] CPU: 4 PID: 5711 Comm: ip Not tainted 5.8.0-rc4_for_upstream_min_debug_2020_07_08_22_04 #1 [ 411.462447] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 411.464158] RIP: 0010:__mutex_lock+0x4e/0x940 [ 411.464928] Code: fd 41 54 49 89 f4 41 52 53 89 d3 48 83 ec 70 44 8b 1d ee 03 b0 01 65 48 8b 04 25 28 00 00 00 48 89 45 c8 31 c0 45 85 db 75 0a <48> 3b 7f 60 0f 85 7e 05 00 00 49 8d 45 68 41 56 41 b8 01 00 00 00 [ 411.467678] RSP: 0018:ffff88841fcd74b0 EFLAGS: 00010246 [ 411.468562] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 411.469715] RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000001158 [ 411.470812] RBP: ffff88841fcd7550 R08: ffffffffa00fa1ce R09: 0000000000000000 [ 411.471835] R10: ffff88841fcd7570 R11: 0000000000000000 R12: 0000000000000002 [ 411.472862] R13: 0000000000001158 R14: ffffffffa00fa1ce R15: 0000000000000000 [ 411.474004] FS: 00007faee7ca6b80(0000) GS:ffff88846fc00000(0000) knlGS:0000000000000000 [ 411.475237] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 411.476129] CR2: 00000000000011b8 CR3: 000000041909c006 CR4: 0000000000360ea0 [ 411.477260] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 411.478340] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 411.479332] Call Trace: [ 411.479760] ? __nla_validate_parse.part.6+0x57/0x8f0 [ 411.482825] ? mlx5_eswitch_set_vport_vlan+0x3e/0xa0 [mlx5_core] [ 411.483804] mlx5_eswitch_set_vport_vlan+0x3e/0xa0 [mlx5_core] [ 411.484733] mlx5e_set_vf_vlan+0x41/0x50 [mlx5_core] [ 411.485545] do_setlink+0x613/0x1000 [ 411.486165] __rtnl_newlink+0x53d/0x8c0 [ 411.486791] ? mark_held_locks+0x49/0x70 [ 411.487429] ? __lock_acquire+0x8fe/0x1eb0 [ 411.488085] ? rcu_read_lock_sched_held+0x52/0x60 [ 411.488998] ? kmem_cache_alloc_trace+0x16d/0x2d0 [ 411.489759] rtnl_newlink+0x47/0x70 [ 411.490357] rtnetlink_rcv_msg+0x24e/0x450 [ 411.490978] ? netlink_deliver_tap+0x92/0x3d0 [ 411.491631] ? validate_linkmsg+0x330/0x330 [ 411.492262] netlink_rcv_skb+0x47/0x110 [ 411.492852] netlink_unicast+0x1ac/0x270 [ 411.493551] netlink_sendmsg+0x336/0x450 [ 411.494209] sock_sendmsg+0x30/0x40 [ 411.494779] ____sys_sendmsg+0x1dd/0x1f0 [ 411.495378] ? copy_msghdr_from_user+0x5c/0x90 [ 411.496082] ___sys_sendmsg+0x87/0xd0 [ 411.496683] ? lock_acquire+0xb9/0x3a0 [ 411.497322] ? lru_cache_add+0x5/0x170 [ 411.497944] ? find_held_lock+0x2d/0x90 [ 411.498568] ? handle_mm_fault+0xe46/0x18c0 [ 411.499205] ? __sys_sendmsg+0x51/0x90 [ 411.499784] __sys_sendmsg+0x51/0x90 [ 411.500341] do_syscall_64+0x59/0x2e0 [ 411.500938] ? asm_exc_page_fault+0x8/0x30 [ 411.501609] ? rcu_read_lock_sched_held+0x52/0x60 [ 411.502350] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 411.503093] RIP: 0033:0x7faee73b85a7 [ 411.503654] Code: Bad RIP value. Fixes: 0e18134f4f9f ("net/mlx5e: Eswitch, use state_lock to synchronize vlan change") Signed-off-by: Alaa Hleihel Reviewed-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 71d01143c455..43005caff09e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1887,8 +1887,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); int err = 0; - if (!ESW_ALLOWED(esw)) - return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); if (vlan > 4095 || qos > 7) @@ -1916,6 +1914,9 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u8 set_flags = 0; int err; + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; -- cgit From e911e99a0770f760377c263bc7bac1b1593c6147 Mon Sep 17 00:00:00 2001 From: Rustam Kovhaev Date: Mon, 27 Jul 2020 23:42:17 -0700 Subject: usb: hso: check for return value in hso_serial_common_create() in case of an error tty_register_device_attr() returns ERR_PTR(), add IS_ERR() check Reported-and-tested-by: syzbot+67b2bd0e34f952d0321e@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=67b2bd0e34f952d0321e Signed-off-by: Rustam Kovhaev Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 5f123a8cf68e..d2fdb5430d27 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2261,12 +2261,14 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, minor = get_free_serial_index(); if (minor < 0) - goto exit; + goto exit2; /* register our minor number */ serial->parent->dev = tty_port_register_device_attr(&serial->port, tty_drv, minor, &serial->parent->interface->dev, serial->parent, hso_serial_dev_groups); + if (IS_ERR(serial->parent->dev)) + goto exit2; /* fill in specific data for later use */ serial->minor = minor; @@ -2311,6 +2313,7 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, return 0; exit: hso_serial_tty_unregister(serial); +exit2: hso_serial_common_free(serial); return -1; } -- cgit From 8d8e95fd6d69d774013f51e5f2ee10c6e6d1fc14 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Jul 2020 14:10:29 +0200 Subject: net: lan78xx: add missing endpoint sanity check Add the missing endpoint sanity check to prevent a NULL-pointer dereference should a malicious device lack the expected endpoints. Note that the driver has a broken endpoint-lookup helper, lan78xx_get_endpoints(), which can end up accepting interfaces in an altsetting without endpoints as long as *some* altsetting has a bulk-in and a bulk-out endpoint. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Cc: Woojung.Huh@microchip.com Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index eccbf4cd7149..d7162690e3f3 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3759,6 +3759,11 @@ static int lan78xx_probe(struct usb_interface *intf, netdev->max_mtu = MAX_SINGLE_PACKET_SIZE; netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER); + if (intf->cur_altsetting->desc.bNumEndpoints < 3) { + ret = -ENODEV; + goto out3; + } + dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0; dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1; dev->ep_intr = (intf->cur_altsetting)->endpoint + 2; -- cgit From 63634aa679ba8b5e306ad0727120309ae6ba8a8e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Jul 2020 14:10:30 +0200 Subject: net: lan78xx: fix transfer-buffer memory leak The interrupt URB transfer-buffer was never freed on disconnect or after probe errors. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Cc: Woojung.Huh@microchip.com Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index d7162690e3f3..ee062b27cfa7 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3788,6 +3788,7 @@ static int lan78xx_probe(struct usb_interface *intf, usb_fill_int_urb(dev->urb_intr, dev->udev, dev->pipe_intr, buf, maxp, intr_complete, dev, period); + dev->urb_intr->transfer_flags |= URB_FREE_BUFFER; } } -- cgit From ea060b352654a8de1e070140d25fe1b7e4d50310 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Jul 2020 14:10:31 +0200 Subject: net: lan78xx: replace bogus endpoint lookup Drop the bogus endpoint-lookup helper which could end up accepting interfaces based on endpoints belonging to unrelated altsettings. Note that the returned bulk pipes and interrupt endpoint descriptor were never actually used. Instead the bulk-endpoint numbers are hardcoded to 1 and 2 (matching the specification), while the interrupt- endpoint descriptor was assumed to be the third descriptor created by USB core. Try to bring some order to this by dropping the bogus lookup helper and adding the missing endpoint sanity checks while keeping the interrupt- descriptor assumption for now. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 117 ++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 87 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ee062b27cfa7..442507f25aad 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -377,10 +377,6 @@ struct lan78xx_net { struct tasklet_struct bh; struct delayed_work wq; - struct usb_host_endpoint *ep_blkin; - struct usb_host_endpoint *ep_blkout; - struct usb_host_endpoint *ep_intr; - int msg_enable; struct urb *urb_intr; @@ -2860,78 +2856,12 @@ lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -static int -lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf) -{ - int tmp; - struct usb_host_interface *alt = NULL; - struct usb_host_endpoint *in = NULL, *out = NULL; - struct usb_host_endpoint *status = NULL; - - for (tmp = 0; tmp < intf->num_altsetting; tmp++) { - unsigned ep; - - in = NULL; - out = NULL; - status = NULL; - alt = intf->altsetting + tmp; - - for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) { - struct usb_host_endpoint *e; - int intr = 0; - - e = alt->endpoint + ep; - switch (e->desc.bmAttributes) { - case USB_ENDPOINT_XFER_INT: - if (!usb_endpoint_dir_in(&e->desc)) - continue; - intr = 1; - /* FALLTHROUGH */ - case USB_ENDPOINT_XFER_BULK: - break; - default: - continue; - } - if (usb_endpoint_dir_in(&e->desc)) { - if (!intr && !in) - in = e; - else if (intr && !status) - status = e; - } else { - if (!out) - out = e; - } - } - if (in && out) - break; - } - if (!alt || !in || !out) - return -EINVAL; - - dev->pipe_in = usb_rcvbulkpipe(dev->udev, - in->desc.bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK); - dev->pipe_out = usb_sndbulkpipe(dev->udev, - out->desc.bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK); - dev->ep_intr = status; - - return 0; -} - static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) { struct lan78xx_priv *pdata = NULL; int ret; int i; - ret = lan78xx_get_endpoints(dev, intf); - if (ret) { - netdev_warn(dev->net, "lan78xx_get_endpoints failed: %d\n", - ret); - return ret; - } - dev->data[0] = (unsigned long)kzalloc(sizeof(*pdata), GFP_KERNEL); pdata = (struct lan78xx_priv *)(dev->data[0]); @@ -3700,6 +3630,7 @@ static void lan78xx_stat_monitor(struct timer_list *t) static int lan78xx_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_host_endpoint *ep_blkin, *ep_blkout, *ep_intr; struct lan78xx_net *dev; struct net_device *netdev; struct usb_device *udev; @@ -3748,6 +3679,34 @@ static int lan78xx_probe(struct usb_interface *intf, mutex_init(&dev->stats.access_lock); + if (intf->cur_altsetting->desc.bNumEndpoints < 3) { + ret = -ENODEV; + goto out2; + } + + dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE); + ep_blkin = usb_pipe_endpoint(udev, dev->pipe_in); + if (!ep_blkin || !usb_endpoint_is_bulk_in(&ep_blkin->desc)) { + ret = -ENODEV; + goto out2; + } + + dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE); + ep_blkout = usb_pipe_endpoint(udev, dev->pipe_out); + if (!ep_blkout || !usb_endpoint_is_bulk_out(&ep_blkout->desc)) { + ret = -ENODEV; + goto out2; + } + + ep_intr = &intf->cur_altsetting->endpoint[2]; + if (!usb_endpoint_is_int_in(&ep_intr->desc)) { + ret = -ENODEV; + goto out2; + } + + dev->pipe_intr = usb_rcvintpipe(dev->udev, + usb_endpoint_num(&ep_intr->desc)); + ret = lan78xx_bind(dev, intf); if (ret < 0) goto out2; @@ -3759,23 +3718,7 @@ static int lan78xx_probe(struct usb_interface *intf, netdev->max_mtu = MAX_SINGLE_PACKET_SIZE; netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER); - if (intf->cur_altsetting->desc.bNumEndpoints < 3) { - ret = -ENODEV; - goto out3; - } - - dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0; - dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1; - dev->ep_intr = (intf->cur_altsetting)->endpoint + 2; - - dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE); - dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE); - - dev->pipe_intr = usb_rcvintpipe(dev->udev, - dev->ep_intr->desc.bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK); - period = dev->ep_intr->desc.bInterval; - + period = ep_intr->desc.bInterval; maxp = usb_maxpacket(dev->udev, dev->pipe_intr, 0); buf = kmalloc(maxp, GFP_KERNEL); if (buf) { -- cgit From 19016d93bfc335f0c158c0d9e3b9d06c4dd53d39 Mon Sep 17 00:00:00 2001 From: René van Dorst Date: Thu, 23 Jul 2020 20:07:10 +0100 Subject: net: ethernet: mtk_eth_soc: Always call mtk_gmac0_rgmii_adjust() for mt7623 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify mtk_gmac0_rgmii_adjust() so it can always be called. mtk_gmac0_rgmii_adjust() sets-up the TRGMII clocks. Signed-off-by: René van Dorst Signed-off-By: David Woodhouse Tested-by: Frank Wunderlich Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f6a1f8666f95..85735d32ecb0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -171,11 +171,21 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth, return 0; } -static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) +static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, + phy_interface_t interface, int speed) { u32 val; int ret; + if (interface == PHY_INTERFACE_MODE_TRGMII) { + mtk_w32(eth, TRGMII_MODE, INTF_MODE); + val = 500000000; + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + if (ret) + dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); + return; + } + val = (speed == SPEED_1000) ? INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100; mtk_w32(eth, val, INTF_MODE); @@ -262,10 +272,9 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, state->interface)) goto err_phy; } else { - if (state->interface != - PHY_INTERFACE_MODE_TRGMII) - mtk_gmac0_rgmii_adjust(mac->hw, - state->speed); + mtk_gmac0_rgmii_adjust(mac->hw, + state->interface, + state->speed); /* mt7623_pad_clk_setup */ for (i = 0 ; i < NUM_TRGMII_CTRL; i++) -- cgit From 1748f6a2cbc4694523f16da1c892b59861045b9d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 24 Jul 2020 20:12:53 +1000 Subject: rhashtable: Fix unprotected RCU dereference in __rht_ptr The rcu_dereference call in rht_ptr_rcu is completely bogus because we've already dereferenced the value in __rht_ptr and operated on it. This causes potential double readings which could be fatal. The RCU dereference must occur prior to the comparison in __rht_ptr. This patch changes the order of RCU dereference so that it is done first and the result is then fed to __rht_ptr. The RCU marking changes have been minimised using casts which will be removed in a follow-up patch. Fixes: ba6306e3f648 ("rhashtable: Remove RCU marking from...") Reported-by: "Gong, Sishuai" Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d3432ee65de7..b8feb5da7c5a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -349,11 +349,11 @@ static inline void rht_unlock(struct bucket_table *tbl, local_bh_enable(); } -static inline struct rhash_head __rcu *__rht_ptr( - struct rhash_lock_head *const *bkt) +static inline struct rhash_head *__rht_ptr( + struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt) { - return (struct rhash_head __rcu *) - ((unsigned long)*bkt & ~BIT(0) ?: + return (struct rhash_head *) + ((unsigned long)p & ~BIT(0) ?: (unsigned long)RHT_NULLS_MARKER(bkt)); } @@ -365,25 +365,26 @@ static inline struct rhash_head __rcu *__rht_ptr( * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr_rcu( - struct rhash_lock_head *const *bkt) + struct rhash_lock_head *const *p) { - struct rhash_head __rcu *p = __rht_ptr(bkt); - - return rcu_dereference(p); + struct rhash_lock_head __rcu *const *bkt = (void *)p; + return __rht_ptr(rcu_dereference(*bkt), bkt); } static inline struct rhash_head *rht_ptr( - struct rhash_lock_head *const *bkt, + struct rhash_lock_head *const *p, struct bucket_table *tbl, unsigned int hash) { - return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash); + struct rhash_lock_head __rcu *const *bkt = (void *)p; + return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); } static inline struct rhash_head *rht_ptr_exclusive( - struct rhash_lock_head *const *bkt) + struct rhash_lock_head *const *p) { - return rcu_dereference_protected(__rht_ptr(bkt), 1); + struct rhash_lock_head __rcu *const *bkt = (void *)p; + return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt); } static inline void rht_assign_locked(struct rhash_lock_head **bkt, -- cgit From ce9b362bf6db51a083c4221ef0f93c16cfb1facf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 24 Jul 2020 20:14:34 +1000 Subject: rhashtable: Restore RCU marking on rhash_lock_head This patch restores the RCU marking on bucket_table->buckets as it really does need RCU protection. Its removal had led to a fatal bug. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 56 ++++++++++++++++++++-------------------------- lib/rhashtable.c | 35 +++++++++++++---------------- 2 files changed, 40 insertions(+), 51 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index b8feb5da7c5a..68dab3e08aad 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -84,7 +84,7 @@ struct bucket_table { struct lockdep_map dep_map; - struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp; + struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /* @@ -261,13 +261,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **__rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -284,21 +283,21 @@ struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_lock_head *const *rht_bucket( +static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head **rht_bucket_var( +static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_lock_head **rht_bucket_insert( +static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : @@ -325,7 +324,7 @@ static inline struct rhash_lock_head **rht_bucket_insert( */ static inline void rht_lock(struct bucket_table *tbl, - struct rhash_lock_head **bkt) + struct rhash_lock_head __rcu **bkt) { local_bh_disable(); bit_spin_lock(0, (unsigned long *)bkt); @@ -333,7 +332,7 @@ static inline void rht_lock(struct bucket_table *tbl, } static inline void rht_lock_nested(struct bucket_table *tbl, - struct rhash_lock_head **bucket, + struct rhash_lock_head __rcu **bucket, unsigned int subclass) { local_bh_disable(); @@ -342,7 +341,7 @@ static inline void rht_lock_nested(struct bucket_table *tbl, } static inline void rht_unlock(struct bucket_table *tbl, - struct rhash_lock_head **bkt) + struct rhash_lock_head __rcu **bkt) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); @@ -365,48 +364,41 @@ static inline struct rhash_head *__rht_ptr( * access is guaranteed, such as when destroying the table. */ static inline struct rhash_head *rht_ptr_rcu( - struct rhash_lock_head *const *p) + struct rhash_lock_head __rcu *const *bkt) { - struct rhash_lock_head __rcu *const *bkt = (void *)p; return __rht_ptr(rcu_dereference(*bkt), bkt); } static inline struct rhash_head *rht_ptr( - struct rhash_lock_head *const *p, + struct rhash_lock_head __rcu *const *bkt, struct bucket_table *tbl, unsigned int hash) { - struct rhash_lock_head __rcu *const *bkt = (void *)p; return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); } static inline struct rhash_head *rht_ptr_exclusive( - struct rhash_lock_head *const *p) + struct rhash_lock_head __rcu *const *bkt) { - struct rhash_lock_head __rcu *const *bkt = (void *)p; return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt); } -static inline void rht_assign_locked(struct rhash_lock_head **bkt, +static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { - struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; - if (rht_is_a_nulls(obj)) obj = NULL; - rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0))); + rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); } static inline void rht_assign_unlock(struct bucket_table *tbl, - struct rhash_lock_head **bkt, + struct rhash_lock_head __rcu **bkt, struct rhash_head *obj) { - struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; - if (rht_is_a_nulls(obj)) obj = NULL; lock_map_release(&tbl->dep_map); - rcu_assign_pointer(*p, obj); + rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); __release(bitlock); local_bh_enable(); @@ -594,7 +586,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct rhash_lock_head *const *bkt; + struct rhash_lock_head __rcu *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -710,7 +702,7 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; @@ -996,7 +988,7 @@ static inline int __rhashtable_remove_fast_one( struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; @@ -1148,7 +1140,7 @@ static inline int __rhashtable_replace_fast( struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; unsigned int hash; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9f6890aedd1a..c949c1e3b87c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -31,7 +31,7 @@ union nested_table { union nested_table __rcu *table; - struct rhash_lock_head *bucket; + struct rhash_lock_head __rcu *bucket; }; static u32 head_hashfn(struct rhashtable *ht, @@ -222,7 +222,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, } static int rhashtable_rehash_one(struct rhashtable *ht, - struct rhash_lock_head **bkt, + struct rhash_lock_head __rcu **bkt, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); @@ -275,7 +275,7 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash); + struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); int err; if (!bkt) @@ -485,7 +485,7 @@ fail: } static void *rhashtable_lookup_one(struct rhashtable *ht, - struct rhash_lock_head **bkt, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { @@ -535,12 +535,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, return ERR_PTR(-ENOENT); } -static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, - struct rhash_lock_head **bkt, - struct bucket_table *tbl, - unsigned int hash, - struct rhash_head *obj, - void *data) +static struct bucket_table *rhashtable_insert_one( + struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, + struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, + void *data) { struct bucket_table *new_tbl; struct rhash_head *head; @@ -591,7 +589,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, { struct bucket_table *new_tbl; struct bucket_table *tbl; - struct rhash_lock_head **bkt; + struct rhash_lock_head __rcu **bkt; unsigned int hash; void *data; @@ -1173,8 +1171,8 @@ void rhashtable_destroy(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_destroy); -struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **__rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); @@ -1202,10 +1200,10 @@ struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, } EXPORT_SYMBOL_GPL(__rht_bucket_nested); -struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash) { - static struct rhash_lock_head *rhnull; + static struct rhash_lock_head __rcu *rhnull; if (!rhnull) INIT_RHT_NULLS_HEAD(rhnull); @@ -1213,9 +1211,8 @@ struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, } EXPORT_SYMBOL_GPL(rht_bucket_nested); -struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **rht_bucket_nested_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); -- cgit From 3cab8c65525920f00d8f4997b3e9bb73aecb3a8e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 24 Jul 2020 16:15:43 -0700 Subject: mlx4: disable device on shutdown It appears that not disabling a PCI device on .shutdown may lead to a Hardware Error with particular (perhaps buggy) BIOS versions: mlx4_en: eth0: Close port called mlx4_en 0000:04:00.0: removed PHC reboot: Restarting system {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 {1}[Hardware Error]: event severity: fatal {1}[Hardware Error]: Error 0, type: fatal {1}[Hardware Error]: section_type: PCIe error {1}[Hardware Error]: port_type: 4, root port {1}[Hardware Error]: version: 1.16 {1}[Hardware Error]: command: 0x4010, status: 0x0143 {1}[Hardware Error]: device_id: 0000:00:02.2 {1}[Hardware Error]: slot: 0 {1}[Hardware Error]: secondary_bus: 0x04 {1}[Hardware Error]: vendor_id: 0x8086, device_id: 0x2f06 {1}[Hardware Error]: class_code: 000604 {1}[Hardware Error]: bridge: secondary_status: 0x2000, control: 0x0003 {1}[Hardware Error]: aer_uncor_status: 0x00100000, aer_uncor_mask: 0x00000000 {1}[Hardware Error]: aer_uncor_severity: 0x00062030 {1}[Hardware Error]: TLP Header: 40000018 040000ff 791f4080 00000000 [hw error repeats] Kernel panic - not syncing: Fatal hardware error! CPU: 0 PID: 2189 Comm: reboot Kdump: loaded Not tainted 5.6.x-blabla #1 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 05/05/2017 Fix the mlx4 driver. This is a very similar problem to what had been fixed in: commit 0d98ba8d70b0 ("scsi: hpsa: disable device during shutdown") to address https://bugzilla.kernel.org/show_bug.cgi?id=199779. Fixes: 2ba5fbd62b25 ("net/mlx4_core: Handle AER flow properly") Reported-by: Jake Lawrence Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 3d9aa7da95e9..2d3e45780719 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4356,12 +4356,14 @@ end: static void mlx4_shutdown(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; mlx4_info(persist->dev, "mlx4_shutdown was called\n"); mutex_lock(&persist->interface_state_mutex); if (persist->interface_state & MLX4_INTERFACE_STATE_UP) mlx4_unload_one(pdev); mutex_unlock(&persist->interface_state_mutex); + mlx4_pci_disable_device(dev); } static const struct pci_error_handlers mlx4_err_handler = { -- cgit From 948a66338f44c16f52c0f03f6ad81a6f59eb5604 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Jul 2020 10:13:52 +0530 Subject: octeontx2-pf: Fix reset_task bugs Two bugs exist in the code related to reset_task in PF driver one is the missing protection against network stack ndo_open and ndo_close. Other one is the missing cancel_work. This patch fixes those problems. Fixes: 4ff7d1488a84 ("octeontx2-pf: Error handling support") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 64786568af0d..75a8c407e815 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1730,10 +1730,12 @@ static void otx2_reset_task(struct work_struct *work) if (!netif_running(pf->netdev)) return; + rtnl_lock(); otx2_stop(pf->netdev); pf->reset_count++; otx2_open(pf->netdev); netif_trans_update(pf->netdev); + rtnl_unlock(); } static const struct net_device_ops otx2_netdev_ops = { @@ -2111,6 +2113,7 @@ static void otx2_remove(struct pci_dev *pdev) pf = netdev_priv(netdev); + cancel_work_sync(&pf->reset_task); /* Disable link notifications */ otx2_cgx_config_linkevents(pf, false); -- cgit From c0376f473c5cc2ef94f8e1e055d173293cc3698c Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Jul 2020 10:13:53 +0530 Subject: octeontx2-pf: cancel reset_task work During driver exit cancel the queued reset_task work in VF driver. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index f4227517dc8e..c1c263d1ac2e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -617,6 +617,7 @@ static void otx2vf_remove(struct pci_dev *pdev) vf = netdev_priv(netdev); + cancel_work_sync(&vf->reset_task); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); -- cgit From ed543f5c6a988d8a863d2436794230cef2c82389 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Jul 2020 10:13:54 +0530 Subject: octeontx2-pf: Unregister netdev at driver remove Added unregister_netdev in the driver remove function. Generally unregister_netdev is called after disabling all the device interrupts but here it is called before disabling device mailbox interrupts. The reason behind this is VF needs mailbox interrupt to communicate with its PF to clean up its resources during otx2_stop. otx2_stop disables packet I/O and queue interrupts first and by using mailbox interrupt communicates to PF to free VF resources. Hence this patch calls unregister_device just before disabling mailbox interrupts. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index c1c263d1ac2e..92a3db69a6cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -618,6 +618,7 @@ static void otx2vf_remove(struct pci_dev *pdev) vf = netdev_priv(netdev); cancel_work_sync(&vf->reset_task); + unregister_netdev(netdev); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); -- cgit From 706ec919164622ff5ce822065472d0f30a9e9dd2 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Sat, 25 Jul 2020 16:02:18 +0800 Subject: ipv6: Fix nexthop refcnt leak when creating ipv6 route info ip6_route_info_create() invokes nexthop_get(), which increases the refcount of the "nh". When ip6_route_info_create() returns, local variable "nh" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one exception handling path of ip6_route_info_create(). When nexthops can not be used with source routing, the function forgets to decrease the refcnt increased by nexthop_get(), causing a refcnt leak. Fix this issue by pulling up the error source routing handling when nexthops can not be used with source routing. Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f3279810d765..4c36bd0c7930 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3685,14 +3685,14 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->fib6_src.plen = cfg->fc_src_len; #endif if (nh) { - if (!nexthop_get(nh)) { - NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); - goto out; - } if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); goto out; } + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + goto out; + } rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); } else { -- cgit From 302d201b5cdf6f4781ee6cd9862f377f975d6c43 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 25 Jul 2020 15:06:47 +0200 Subject: bareudp: forbid mixing IP and MPLS in multiproto mode In multiproto mode, bareudp_xmit() accepts sending multicast MPLS and IPv6 packets regardless of the bareudp ethertype. In practice, this let an IP tunnel send multicast MPLS packets, or an MPLS tunnel send IPv6 packets. We need to restrict the test further, so that the multiproto mode only enables * IPv6 for IPv4 tunnels, * or multicast MPLS for unicast MPLS tunnels. To improve clarity, the protocol validation is moved to its own function, where each logical test has its own condition. v2: s/ntohs/htons/ Fixes: 4b5f67232d95 ("net: Special handling for IP & MPLS.") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 3dd46cd55114..88e7900853db 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -407,19 +407,34 @@ free_dst: return err; } +static bool bareudp_proto_valid(struct bareudp_dev *bareudp, __be16 proto) +{ + if (bareudp->ethertype == proto) + return true; + + if (!bareudp->multi_proto_mode) + return false; + + if (bareudp->ethertype == htons(ETH_P_MPLS_UC) && + proto == htons(ETH_P_MPLS_MC)) + return true; + + if (bareudp->ethertype == htons(ETH_P_IP) && + proto == htons(ETH_P_IPV6)) + return true; + + return false; +} + static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) { struct bareudp_dev *bareudp = netdev_priv(dev); struct ip_tunnel_info *info = NULL; int err; - if (skb->protocol != bareudp->ethertype) { - if (!bareudp->multi_proto_mode || - (skb->protocol != htons(ETH_P_MPLS_MC) && - skb->protocol != htons(ETH_P_IPV6))) { - err = -EINVAL; - goto tx_error; - } + if (!bareudp_proto_valid(bareudp, skb->protocol)) { + err = -EINVAL; + goto tx_error; } info = skb_tunnel_info(skb); -- cgit From 1ed06dbc213e1408ab60faa3ac0c0855a3ddc52d Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Tue, 28 Jul 2020 22:35:52 +0530 Subject: Documentation: bareudp: Corrected description of bareudp module. Removed redundant words. Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Martin Varghese Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst index ff406563ea88..b9d04ee6dac1 100644 --- a/Documentation/networking/bareudp.rst +++ b/Documentation/networking/bareudp.rst @@ -8,9 +8,8 @@ There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. -The Bareudp tunnel module provides a generic L3 encapsulation tunnelling -support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside -a UDP tunnel. +The Bareudp tunnel module provides a generic L3 encapsulation support for +tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. Special Handling ---------------- -- cgit From b5141915b5aec3b29a63db869229e3741ebce258 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 11:34:36 +0300 Subject: vxlan: Ensure FDB dump is performed under RCU The commit cited below removed the RCU read-side critical section from rtnl_fdb_dump() which means that the ndo_fdb_dump() callback is invoked without RCU protection. This results in the following warning [1] in the VXLAN driver, which relied on the callback being invoked from an RCU read-side critical section. Fix this by calling rcu_read_lock() in the VXLAN driver, as already done in the bridge driver. [1] WARNING: suspicious RCU usage 5.8.0-rc4-custom-01521-g481007553ce6 #29 Not tainted ----------------------------- drivers/net/vxlan.c:1379 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by bridge/166: #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0xea/0x1090 stack backtrace: CPU: 1 PID: 166 Comm: bridge Not tainted 5.8.0-rc4-custom-01521-g481007553ce6 #29 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x100/0x184 lockdep_rcu_suspicious+0x153/0x15d vxlan_fdb_dump+0x51e/0x6d0 rtnl_fdb_dump+0x4dc/0xad0 netlink_dump+0x540/0x1090 __netlink_dump_start+0x695/0x950 rtnetlink_rcv_msg+0x802/0xbd0 netlink_rcv_skb+0x17a/0x480 rtnetlink_rcv+0x22/0x30 netlink_unicast+0x5ae/0x890 netlink_sendmsg+0x98a/0xf40 __sys_sendto+0x279/0x3b0 __x64_sys_sendto+0xe6/0x1a0 do_syscall_64+0x54/0xa0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fe14fa2ade0 Code: Bad RIP value. RSP: 002b:00007fff75bb5b88 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00005614b1ba0020 RCX: 00007fe14fa2ade0 RDX: 000000000000011c RSI: 00007fff75bb5b90 RDI: 0000000000000003 RBP: 00007fff75bb5b90 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00005614b1b89160 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 89d85dcb200e..5efe1e28f270 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1376,6 +1376,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; + rcu_read_lock(); hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { struct vxlan_rdst *rd; @@ -1387,8 +1388,10 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, NULL); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto out; + } skip_nh: *idx += 1; continue; @@ -1403,12 +1406,15 @@ skip_nh: cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, rd); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto out; + } skip: *idx += 1; } } + rcu_read_unlock(); } out: return err; -- cgit From 83f3522860f702748143e022f1a546547314c715 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 11:37:13 +0300 Subject: ipv4: Silence suspicious RCU usage warning fib_trie_unmerge() is called with RTNL held, but not from an RCU read-side critical section. This leads to the following warning [1] when the FIB alias list in a leaf is traversed with hlist_for_each_entry_rcu(). Since the function is always called with RTNL held and since modification of the list is protected by RTNL, simply use hlist_for_each_entry() and silence the warning. [1] WARNING: suspicious RCU usage 5.8.0-rc4-custom-01520-gc1f937f3f83b #30 Not tainted ----------------------------- net/ipv4/fib_trie.c:1867 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/164: #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0 stack backtrace: CPU: 0 PID: 164 Comm: ip Not tainted 5.8.0-rc4-custom-01520-gc1f937f3f83b #30 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x100/0x184 lockdep_rcu_suspicious+0x153/0x15d fib_trie_unmerge+0x608/0xdb0 fib_unmerge+0x44/0x360 fib4_rule_configure+0xc8/0xad0 fib_nl_newrule+0x37a/0x1dd0 rtnetlink_rcv_msg+0x4f7/0xbd0 netlink_rcv_skb+0x17a/0x480 rtnetlink_rcv+0x22/0x30 netlink_unicast+0x5ae/0x890 netlink_sendmsg+0x98a/0xf40 ____sys_sendmsg+0x879/0xa00 ___sys_sendmsg+0x122/0x190 __sys_sendmsg+0x103/0x1d0 __x64_sys_sendmsg+0x7d/0xb0 do_syscall_64+0x54/0xa0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fc80a234e97 Code: Bad RIP value. RSP: 002b:00007ffef8b66798 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc80a234e97 RDX: 0000000000000000 RSI: 00007ffef8b66800 RDI: 0000000000000003 RBP: 000000005f141b1c R08: 0000000000000001 R09: 0000000000000000 R10: 00007fc80a2a8ac0 R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000000 R14: 00007ffef8b67008 R15: 0000556fccb10020 Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 248f1c1959a6..3c65f71d0e82 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1864,7 +1864,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) while ((l = leaf_walk_rcu(&tp, key)) != NULL) { struct key_vector *local_l = NULL, *local_tp; - hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + hlist_for_each_entry(fa, &l->leaf, fa_list) { struct fib_alias *new_fa; if (local_tb->tb_id != fa->tb_id) -- cgit From 89ab53313543b18640eb5d25a1e74bf9eee889b0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 12:26:43 +0300 Subject: mlxsw: spectrum_router: Allow programming link-local host routes Cited commit added the ability to program link-local prefix routes to the ASIC so that relevant packets are routed and trapped correctly. However, host routes were not included in the change and thus not programmed to the ASIC. This can result in packets being trapped via an external route trap instead of a local route trap as in IPv4. Fix this by programming all the link-local routes to the ASIC. Fixes: 10d3757fcb07 ("mlxsw: spectrum_router: Allow programming link-local prefix routes") Reported-by: Alex Veber Tested-by: Alex Veber Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 019ed503aadf..bd4803074776 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5001,15 +5001,6 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) { - /* Packets with link-local destination IP arriving to the router - * are trapped to the CPU, so no need to program specific routes - * for them. Only allow prefix routes (usually one fe80::/64) so - * that packets are trapped for the right reason. - */ - if ((ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL) && - (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))) - return true; - /* Multicast routes aren't supported, so ignore them. Neighbour * Discovery packets are specifically trapped. */ -- cgit From ec4f5b3617049d474b3263792785b638640f2dbe Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 12:26:44 +0300 Subject: mlxsw: spectrum: Use different trap group for externally routed packets Cited commit mistakenly removed the trap group for externally routed packets (e.g., via the management interface) and grouped locally routed and externally routed packet traps under the same group, thereby subjecting them to the same policer. This can result in problems, for example, when FRR is restarted and suddenly all transient traffic is trapped to the CPU because of a default route through the management interface. Locally routed packets required to re-establish a BGP connection will never reach the CPU and the routing tables will not be re-populated. Fix this by using a different trap group for externally routed packets. Fixes: 8110668ecd9a ("mlxsw: spectrum_trap: Register layer 3 control traps") Reported-by: Alex Veber Tested-by: Alex Veber Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-trap.rst | 4 ++++ drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 14 +++++++++++--- include/net/devlink.h | 3 +++ net/core/devlink.c | 1 + 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index 1e3f3ffee248..2014307fbe63 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -486,6 +486,10 @@ narrow. The description of these groups must be added to the following table: - Contains packet traps for packets that should be locally delivered after routing, but do not match more specific packet traps (e.g., ``ipv4_bgp``) + * - ``external_delivery`` + - Contains packet traps for packets that should be routed through an + external interface (e.g., management interface) that does not belong to + the same device (e.g., switch ASIC) as the ingress interface * - ``ipv6`` - Contains packet traps for various IPv6 control packets (e.g., Router Advertisements) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index fcb88d4271bf..8ac987c8c8bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5536,6 +5536,7 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_EXTERNAL_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 157a42c63066..1e38dfe7cf64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -328,6 +328,9 @@ mlxsw_sp_trap_policer_items_arr[] = { { .policer = MLXSW_SP_TRAP_POLICER(18, 1024, 128), }, + { + .policer = MLXSW_SP_TRAP_POLICER(19, 1024, 512), + }, }; static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = { @@ -421,6 +424,11 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = { .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, .priority = 2, }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(EXTERNAL_DELIVERY, 19), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_EXTERNAL_ROUTE, + .priority = 1, + }, { .group = DEVLINK_TRAP_GROUP_GENERIC(IPV6, 15), .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6, @@ -882,11 +890,11 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { }, }, { - .trap = MLXSW_SP_TRAP_CONTROL(EXTERNAL_ROUTE, LOCAL_DELIVERY, + .trap = MLXSW_SP_TRAP_CONTROL(EXTERNAL_ROUTE, EXTERNAL_DELIVERY, TRAP), .listeners_arr = { - MLXSW_SP_RXL_MARK(RTR_INGRESS0, IP2ME, TRAP_TO_CPU, - false), + MLXSW_SP_RXL_MARK(RTR_INGRESS0, EXTERNAL_ROUTE, + TRAP_TO_CPU, false), }, }, { diff --git a/include/net/devlink.h b/include/net/devlink.h index 1df6dfec26c2..95b0322a2a82 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -718,6 +718,7 @@ enum devlink_trap_group_generic_id { DEVLINK_TRAP_GROUP_GENERIC_ID_PIM, DEVLINK_TRAP_GROUP_GENERIC_ID_UC_LB, DEVLINK_TRAP_GROUP_GENERIC_ID_LOCAL_DELIVERY, + DEVLINK_TRAP_GROUP_GENERIC_ID_EXTERNAL_DELIVERY, DEVLINK_TRAP_GROUP_GENERIC_ID_IPV6, DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_EVENT, DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL, @@ -915,6 +916,8 @@ enum devlink_trap_group_generic_id { "uc_loopback" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_LOCAL_DELIVERY \ "local_delivery" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EXTERNAL_DELIVERY \ + "external_delivery" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_IPV6 \ "ipv6" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_EVENT \ diff --git a/net/core/devlink.c b/net/core/devlink.c index 2cafbc808b09..dc2b18475956 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8567,6 +8567,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(PIM), DEVLINK_TRAP_GROUP(UC_LB), DEVLINK_TRAP_GROUP(LOCAL_DELIVERY), + DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY), DEVLINK_TRAP_GROUP(IPV6), DEVLINK_TRAP_GROUP(PTP_EVENT), DEVLINK_TRAP_GROUP(PTP_GENERAL), -- cgit From 7d8e8f3433dc8d1dc87c1aabe73a154978fb4c4d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 12:26:45 +0300 Subject: mlxsw: core: Increase scope of RCU read-side critical section The lifetime of the Rx listener item ('rxl_item') is managed using RCU, but is dereferenced outside of RCU read-side critical section, which can lead to a use-after-free. Fix this by increasing the scope of the RCU read-side critical section. Fixes: 93c1edb27f9e ("mlxsw: Introduce Mellanox switch driver core") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d6d6fe64887b..5e76a96a118e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -2051,11 +2051,13 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, break; } } - rcu_read_unlock(); - if (!found) + if (!found) { + rcu_read_unlock(); goto drop; + } rxl->func(skb, local_port, rxl_item->priv); + rcu_read_unlock(); return; drop: -- cgit From 3c8ce24b037648a5a15b85888b259a74b05ff97d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 12:26:46 +0300 Subject: mlxsw: core: Free EMAD transactions using kfree_rcu() The lifetime of EMAD transactions (i.e., 'struct mlxsw_reg_trans') is managed using RCU. They are freed using kfree_rcu() once the transaction ends. However, in case the transaction failed it is freed immediately after being removed from the active transactions list. This is problematic because it is still possible for a different CPU to dereference the transaction from an RCU read-side critical section while traversing the active transaction list in mlxsw_emad_rx_listener_func(). In which case, a use-after-free is triggered [1]. Fix this by freeing the transaction after a grace period by calling kfree_rcu(). [1] BUG: KASAN: use-after-free in mlxsw_emad_rx_listener_func+0x969/0xac0 drivers/net/ethernet/mellanox/mlxsw/core.c:671 Read of size 8 at addr ffff88800b7964e8 by task syz-executor.2/2881 CPU: 0 PID: 2881 Comm: syz-executor.2 Not tainted 5.8.0-rc4+ #44 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xf6/0x16e lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 mlxsw_emad_rx_listener_func+0x969/0xac0 drivers/net/ethernet/mellanox/mlxsw/core.c:671 mlxsw_core_skb_receive+0x571/0x700 drivers/net/ethernet/mellanox/mlxsw/core.c:2061 mlxsw_pci_cqe_rdq_handle drivers/net/ethernet/mellanox/mlxsw/pci.c:595 [inline] mlxsw_pci_cq_tasklet+0x12a6/0x2520 drivers/net/ethernet/mellanox/mlxsw/pci.c:651 tasklet_action_common.isra.0+0x13f/0x3e0 kernel/softirq.c:550 __do_softirq+0x223/0x964 kernel/softirq.c:292 asm_call_on_stack+0x12/0x20 arch/x86/entry/entry_64.S:711 __run_on_irqstack arch/x86/include/asm/irq_stack.h:22 [inline] run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:48 [inline] do_softirq_own_stack+0x109/0x140 arch/x86/kernel/irq_64.c:77 invoke_softirq kernel/softirq.c:387 [inline] __irq_exit_rcu kernel/softirq.c:417 [inline] irq_exit_rcu+0x16f/0x1a0 kernel/softirq.c:429 sysvec_apic_timer_interrupt+0x4e/0xd0 arch/x86/kernel/apic/apic.c:1091 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:587 RIP: 0010:arch_local_irq_restore arch/x86/include/asm/irqflags.h:85 [inline] RIP: 0010:__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 [inline] RIP: 0010:_raw_spin_unlock_irqrestore+0x3b/0x40 kernel/locking/spinlock.c:191 Code: e8 2a c3 f4 fc 48 89 ef e8 12 96 f5 fc f6 c7 02 75 11 53 9d e8 d6 db 11 fd 65 ff 0d 1f 21 b3 56 5b 5d c3 e8 a7 d7 11 fd 53 9d ed 0f 1f 00 55 48 89 fd 65 ff 05 05 21 b3 56 ff 74 24 08 48 8d RSP: 0018:ffff8880446ffd80 EFLAGS: 00000286 RAX: 0000000000000006 RBX: 0000000000000286 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffa94ecea9 RBP: ffff888012934408 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: fffffbfff57be301 R12: 1ffff110088dffc1 R13: ffff888037b817c0 R14: ffff88802442415a R15: ffff888024424000 __do_sys_perf_event_open+0x1b5d/0x2bd0 kernel/events/core.c:11874 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x473dbd Code: Bad RIP value. RSP: 002b:00007f21e5e9cc28 EFLAGS: 00000246 ORIG_RAX: 000000000000012a RAX: ffffffffffffffda RBX: 000000000057bf00 RCX: 0000000000473dbd RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000040 RBP: 000000000057bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000246 R12: 000000000057bf0c R13: 00007ffd0493503f R14: 00000000004d0f46 R15: 00007f21e5e9cd80 Allocated by task 871: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:467 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] mlxsw_core_reg_access_emad+0x70/0x1410 drivers/net/ethernet/mellanox/mlxsw/core.c:1812 mlxsw_core_reg_access+0xeb/0x540 drivers/net/ethernet/mellanox/mlxsw/core.c:1991 mlxsw_sp_port_get_hw_xstats+0x335/0x7e0 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1130 update_stats_cache+0xf4/0x140 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1173 process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269 worker_thread+0x9e/0x1050 kernel/workqueue.c:2415 kthread+0x355/0x470 kernel/kthread.c:291 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293 Freed by task 871: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x12c/0x170 mm/kasan/common.c:455 slab_free_hook mm/slub.c:1474 [inline] slab_free_freelist_hook mm/slub.c:1507 [inline] slab_free mm/slub.c:3072 [inline] kfree+0xe6/0x320 mm/slub.c:4052 mlxsw_core_reg_access_emad+0xd45/0x1410 drivers/net/ethernet/mellanox/mlxsw/core.c:1819 mlxsw_core_reg_access+0xeb/0x540 drivers/net/ethernet/mellanox/mlxsw/core.c:1991 mlxsw_sp_port_get_hw_xstats+0x335/0x7e0 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1130 update_stats_cache+0xf4/0x140 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:1173 process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269 worker_thread+0x9e/0x1050 kernel/workqueue.c:2415 kthread+0x355/0x470 kernel/kthread.c:291 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293 The buggy address belongs to the object at ffff88800b796400 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 232 bytes inside of 512-byte region [ffff88800b796400, ffff88800b796600) The buggy address belongs to the page: page:ffffea00002de500 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 head:ffffea00002de500 order:2 compound_mapcount:0 compound_pincount:0 flags: 0x100000000010200(slab|head) raw: 0100000000010200 dead000000000100 dead000000000122 ffff88806c402500 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88800b796380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88800b796400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88800b796480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88800b796500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88800b796580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: caf7297e7ab5 ("mlxsw: core: Introduce support for asynchronous EMAD register access") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 5e76a96a118e..71b6185b4904 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1814,7 +1814,7 @@ static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans, bulk_list, cb, cb_priv, tid); if (err) { - kfree(trans); + kfree_rcu(trans, rcu); return err; } return 0; -- cgit From 5515c3448d55bdcb5ff8a1778aa84f34e4205596 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 29 Jul 2020 12:26:47 +0300 Subject: mlxsw: spectrum_router: Fix use-after-free in router init / de-init Several notifiers are registered as part of router initialization. Since some of these notifiers are registered before the end of the initialization, it is possible for them to access uninitialized or freed memory when processing notifications [1]. Additionally, some of these notifiers queue work items on a workqueue. If these work items are executed after the router was de-initialized, they will access freed memory. Fix both problems by moving the registration of the notifiers to the end of the router initialization and flush the work queue after they are unregistered. [1] BUG: KASAN: use-after-free in __mutex_lock_common kernel/locking/mutex.c:938 [inline] BUG: KASAN: use-after-free in __mutex_lock+0xeea/0x1340 kernel/locking/mutex.c:1103 Read of size 8 at addr ffff888038c3a6e0 by task kworker/u4:1/61 CPU: 1 PID: 61 Comm: kworker/u4:1 Not tainted 5.8.0-rc2+ #36 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: mlxsw_core_ordered mlxsw_sp_inet6addr_event_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xf6/0x16e lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 __mutex_lock_common kernel/locking/mutex.c:938 [inline] __mutex_lock+0xeea/0x1340 kernel/locking/mutex.c:1103 mlxsw_sp_inet6addr_event_work+0xb3/0x1b0 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:7123 process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269 worker_thread+0x9e/0x1050 kernel/workqueue.c:2415 kthread+0x355/0x470 kernel/kthread.c:291 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293 Allocated by task 1298: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:467 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] mlxsw_sp_router_init+0xb2/0x1d20 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:8074 mlxsw_sp_init+0xbd8/0x3ac0 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:2932 __mlxsw_core_bus_device_register+0x657/0x10d0 drivers/net/ethernet/mellanox/mlxsw/core.c:1375 mlxsw_core_bus_device_register drivers/net/ethernet/mellanox/mlxsw/core.c:1436 [inline] mlxsw_devlink_core_bus_device_reload_up+0xcd/0x150 drivers/net/ethernet/mellanox/mlxsw/core.c:1133 devlink_reload net/core/devlink.c:2959 [inline] devlink_reload+0x281/0x3b0 net/core/devlink.c:2944 devlink_nl_cmd_reload+0x2f1/0x7c0 net/core/devlink.c:2987 genl_family_rcv_msg_doit net/netlink/genetlink.c:691 [inline] genl_family_rcv_msg net/netlink/genetlink.c:736 [inline] genl_rcv_msg+0x611/0x9d0 net/netlink/genetlink.c:753 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2469 genl_rcv+0x24/0x40 net/netlink/genetlink.c:764 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0x150/0x190 net/socket.c:672 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2363 ___sys_sendmsg+0xff/0x170 net/socket.c:2417 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2450 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 1348: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x12c/0x170 mm/kasan/common.c:455 slab_free_hook mm/slub.c:1474 [inline] slab_free_freelist_hook mm/slub.c:1507 [inline] slab_free mm/slub.c:3072 [inline] kfree+0xe6/0x320 mm/slub.c:4063 mlxsw_sp_fini+0x340/0x4e0 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:3132 mlxsw_core_bus_device_unregister+0x16c/0x6d0 drivers/net/ethernet/mellanox/mlxsw/core.c:1474 mlxsw_devlink_core_bus_device_reload_down+0x8e/0xc0 drivers/net/ethernet/mellanox/mlxsw/core.c:1123 devlink_reload+0xc6/0x3b0 net/core/devlink.c:2952 devlink_nl_cmd_reload+0x2f1/0x7c0 net/core/devlink.c:2987 genl_family_rcv_msg_doit net/netlink/genetlink.c:691 [inline] genl_family_rcv_msg net/netlink/genetlink.c:736 [inline] genl_rcv_msg+0x611/0x9d0 net/netlink/genetlink.c:753 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2469 genl_rcv+0x24/0x40 net/netlink/genetlink.c:764 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0x150/0x190 net/socket.c:672 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2363 ___sys_sendmsg+0xff/0x170 net/socket.c:2417 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2450 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff888038c3a000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 1760 bytes inside of 2048-byte region [ffff888038c3a000, ffff888038c3a800) The buggy address belongs to the page: page:ffffea0000e30e00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 head:ffffea0000e30e00 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x100000000010200(slab|head) raw: 0100000000010200 dead000000000100 dead000000000122 ffff88806c40c000 raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888038c3a580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888038c3a600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888038c3a680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888038c3a700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888038c3a780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 965fa8e600d2 ("mlxsw: spectrum_router: Make RIF deletion more robust") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 50 +++++++++++----------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index bd4803074776..0521e9d48c45 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8069,16 +8069,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; - router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; - err = register_inetaddr_notifier(&router->inetaddr_nb); - if (err) - goto err_register_inetaddr_notifier; - - router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event; - err = register_inet6addr_notifier(&router->inet6addr_nb); - if (err) - goto err_register_inet6addr_notifier; - INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -8119,12 +8109,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_neigh_init; - mlxsw_sp->router->netevent_nb.notifier_call = - mlxsw_sp_router_netevent_event; - err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); - if (err) - goto err_register_netevent_notifier; - err = mlxsw_sp_mp_hash_init(mlxsw_sp); if (err) goto err_mp_hash_init; @@ -8133,6 +8117,22 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_dscp_init; + router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; + err = register_inetaddr_notifier(&router->inetaddr_nb); + if (err) + goto err_register_inetaddr_notifier; + + router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event; + err = register_inet6addr_notifier(&router->inet6addr_nb); + if (err) + goto err_register_inet6addr_notifier; + + mlxsw_sp->router->netevent_nb.notifier_call = + mlxsw_sp_router_netevent_event; + err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); + if (err) + goto err_register_netevent_notifier; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->fib_nb, @@ -8143,10 +8143,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, return 0; err_register_fib_notifier: -err_dscp_init: -err_mp_hash_init: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: + unregister_inet6addr_notifier(&router->inet6addr_nb); +err_register_inet6addr_notifier: + unregister_inetaddr_notifier(&router->inetaddr_nb); +err_register_inetaddr_notifier: + mlxsw_core_flush_owq(); +err_dscp_init: +err_mp_hash_init: mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); @@ -8165,10 +8170,6 @@ err_ipips_init: err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: - unregister_inet6addr_notifier(&router->inet6addr_nb); -err_register_inet6addr_notifier: - unregister_inetaddr_notifier(&router->inetaddr_nb); -err_register_inetaddr_notifier: mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; @@ -8179,6 +8180,9 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->fib_nb); unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); + unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); + mlxsw_core_flush_owq(); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); @@ -8188,8 +8192,6 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); - unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); - unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); } -- cgit From 10fef9ca6a879e7bee090b8e51c9812d438d3fb1 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Wed, 29 Jul 2020 12:26:48 +0300 Subject: selftests: ethtool: Fix test when only two speeds are supported The test case check_highest_speed_is_chosen() configures $h1 to advertise a subset of its supported speeds and checks that $h2 chooses the highest speed from the subset. To find the common advertised speeds between $h1 and $h2, common_speeds_get() is called. Currently, the first speed returned from common_speeds_get() is removed claiming "h1 does not advertise this speed". The claim is wrong because the function is called after $h1 already advertised a subset of speeds. In case $h1 supports only two speeds, it will advertise a single speed which will be later removed because of previously mentioned bug. This results in the test needlessly failing. When more than two speeds are supported this is not an issue because the first advertised speed is the lowest one. Fix this by not removing any speed from the list of commonly advertised speeds. Fixes: 64916b57c0b1 ("selftests: forwarding: Add speed and auto-negotiation test") Reported-by: Danielle Ratson Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/ethtool.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh index eb8e2a23bbb4..43a948feed26 100755 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ b/tools/testing/selftests/net/forwarding/ethtool.sh @@ -252,8 +252,6 @@ check_highest_speed_is_chosen() fi local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) - # Remove the first speed, h1 does not advertise this speed. - unset speeds_arr[0] max_speed=${speeds_arr[0]} for current in ${speeds_arr[@]}; do -- cgit From 27a2145d6f826d1fad9de06ac541b1016ced3427 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 29 Jul 2020 16:36:32 -0500 Subject: ibmvnic: Fix IRQ mapping disposal in error path RX queue IRQ mappings are disposed in both the TX IRQ and RX IRQ error paths. Fix this and dispose of TX IRQ mappings correctly in case of an error. Fixes: ea22d51a7831 ("ibmvnic: simplify and improve driver probe function") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0fd7eae25fe9..5afb3c9c52d2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3206,7 +3206,7 @@ req_rx_irq_failed: req_tx_irq_failed: for (j = 0; j < i; j++) { free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); - irq_dispose_mapping(adapter->rx_scrq[j]->irq); + irq_dispose_mapping(adapter->tx_scrq[j]->irq); } release_sub_crqs(adapter, 1); return rc; -- cgit From 1d4e1eab456e1ee92a94987499b211db05f900ea Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Jul 2020 21:09:12 -0700 Subject: bpf: Fix map leak in HASH_OF_MAPS map Fix HASH_OF_MAPS bug of not putting inner map pointer on bpf_map_elem_update() operation. This is due to per-cpu extra_elems optimization, which bypassed free_htab_elem() logic doing proper clean ups. Make sure that inner map is put properly in optimized case as well. Fixes: 8c290e60fa2a ("bpf: fix hashmap extra_elems logic") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200729040913.2815687-1-andriin@fb.com --- kernel/bpf/hashtab.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b4b288a3c3c9..b32cc8ce8ff6 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -779,15 +779,20 @@ static void htab_elem_free_rcu(struct rcu_head *head) htab_elem_free(htab, l); } -static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) +static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) { struct bpf_map *map = &htab->map; + void *ptr; if (map->ops->map_fd_put_ptr) { - void *ptr = fd_htab_map_get_ptr(map, l); - + ptr = fd_htab_map_get_ptr(map, l); map->ops->map_fd_put_ptr(ptr); } +} + +static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) +{ + htab_put_fd_value(htab, l); if (htab_is_prealloc(htab)) { __pcpu_freelist_push(&htab->freelist, &l->fnode); @@ -839,6 +844,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, */ pl_new = this_cpu_ptr(htab->extra_elems); l_new = *pl_new; + htab_put_fd_value(htab, old_elem); *pl_new = old_elem; } else { struct pcpu_freelist_node *l; -- cgit From 0ba58348414eb10249480635545758b40b3c33b6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Jul 2020 21:09:13 -0700 Subject: selftests/bpf: Extend map-in-map selftest to detect memory leaks Add test validating that all inner maps are released properly after skeleton is destroyed. To ensure determinism, trigger kernel-side synchronize_rcu() before checking map existence by their IDs. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200729040913.2815687-2-andriin@fb.com --- .../selftests/bpf/prog_tests/btf_map_in_map.c | 124 ++++++++++++++++++--- 1 file changed, 110 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index f7ee8fa377ad..6ccecbd39476 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -5,10 +5,60 @@ #include "test_btf_map_in_map.skel.h" +static int duration; + +static __u32 bpf_map_id(struct bpf_map *map) +{ + struct bpf_map_info info; + __u32 info_len = sizeof(info); + int err; + + memset(&info, 0, info_len); + err = bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, &info_len); + if (err) + return 0; + return info.id; +} + +/* + * Trigger synchronize_rcu() in kernel. + * + * ARRAY_OF_MAPS/HASH_OF_MAPS lookup/update operations trigger synchronize_rcu() + * if looking up an existing non-NULL element or updating the map with a valid + * inner map FD. Use this fact to trigger synchronize_rcu(): create map-in-map, + * create a trivial ARRAY map, update map-in-map with ARRAY inner map. Then + * cleanup. At the end, at least one synchronize_rcu() would be called. + */ +static int kern_sync_rcu(void) +{ + int inner_map_fd, outer_map_fd, err, zero = 0; + + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); + if (CHECK(inner_map_fd < 0, "inner_map_create", "failed %d\n", -errno)) + return -1; + + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, + sizeof(int), inner_map_fd, 1, 0); + if (CHECK(outer_map_fd < 0, "outer_map_create", "failed %d\n", -errno)) { + close(inner_map_fd); + return -1; + } + + err = bpf_map_update_elem(outer_map_fd, &zero, &inner_map_fd, 0); + if (err) + err = -errno; + CHECK(err, "outer_map_update", "failed %d\n", err); + close(inner_map_fd); + close(outer_map_fd); + return err; +} + void test_btf_map_in_map(void) { - int duration = 0, err, key = 0, val; - struct test_btf_map_in_map* skel; + int err, key = 0, val, i; + struct test_btf_map_in_map *skel; + int outer_arr_fd, outer_hash_fd; + int fd, map1_fd, map2_fd, map1_id, map2_id; skel = test_btf_map_in_map__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) @@ -18,32 +68,78 @@ void test_btf_map_in_map(void) if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; + map1_fd = bpf_map__fd(skel->maps.inner_map1); + map2_fd = bpf_map__fd(skel->maps.inner_map2); + outer_arr_fd = bpf_map__fd(skel->maps.outer_arr); + outer_hash_fd = bpf_map__fd(skel->maps.outer_hash); + /* inner1 = input, inner2 = input + 1 */ - val = bpf_map__fd(skel->maps.inner_map1); - bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0); - val = bpf_map__fd(skel->maps.inner_map2); - bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0); + map1_fd = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0); + map2_fd = bpf_map__fd(skel->maps.inner_map2); + bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0); skel->bss->input = 1; usleep(1); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val); + bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val); + bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); /* inner1 = input + 1, inner2 = input */ - val = bpf_map__fd(skel->maps.inner_map2); - bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0); - val = bpf_map__fd(skel->maps.inner_map1); - bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0); + bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0); + bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0); skel->bss->input = 3; usleep(1); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val); + bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val); + bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + for (i = 0; i < 5; i++) { + val = i % 2 ? map1_fd : map2_fd; + err = bpf_map_update_elem(outer_hash_fd, &key, &val, 0); + if (CHECK_FAIL(err)) { + printf("failed to update hash_of_maps on iter #%d\n", i); + goto cleanup; + } + err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0); + if (CHECK_FAIL(err)) { + printf("failed to update hash_of_maps on iter #%d\n", i); + goto cleanup; + } + } + + map1_id = bpf_map_id(skel->maps.inner_map1); + map2_id = bpf_map_id(skel->maps.inner_map2); + CHECK(map1_id == 0, "map1_id", "failed to get ID 1\n"); + CHECK(map2_id == 0, "map2_id", "failed to get ID 2\n"); + + test_btf_map_in_map__destroy(skel); + skel = NULL; + + /* we need to either wait for or force synchronize_rcu(), before + * checking for "still exists" condition, otherwise map could still be + * resolvable by ID, causing false positives. + * + * Older kernels (5.8 and earlier) freed map only after two + * synchronize_rcu()s, so trigger two, to be entirely sure. + */ + CHECK(kern_sync_rcu(), "sync_rcu", "failed\n"); + CHECK(kern_sync_rcu(), "sync_rcu", "failed\n"); + + fd = bpf_map_get_fd_by_id(map1_id); + if (CHECK(fd >= 0, "map1_leak", "inner_map1 leaked!\n")) { + close(fd); + goto cleanup; + } + fd = bpf_map_get_fd_by_id(map2_id); + if (CHECK(fd >= 0, "map2_leak", "inner_map2 leaked!\n")) { + close(fd); + goto cleanup; + } + cleanup: test_btf_map_in_map__destroy(skel); } -- cgit From fadd1a63a7b4df295a01fa50b2f4e447542bee59 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 29 Jul 2020 18:38:42 +0200 Subject: espintcp: handle short messages instead of breaking the encap socket Currently, short messages (less than 4 bytes after the length header) will break the stream of messages. This is unnecessary, since we can still parse messages even if they're too short to contain any usable data. This is also bogus, as keepalive messages (a single 0xff byte), though not needed with TCP encapsulation, should be allowed. This patch changes the stream parser so that short messages are accepted and dropped in the kernel. Messages that contain a valid SPI or non-ESP header are processed as before. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Reported-by: Andrew Cagney Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/espintcp.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index cb83e3664680..0a91b07f2b43 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -49,9 +49,32 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx, strp); struct strp_msg *rxm = strp_msg(skb); + int len = rxm->full_len - 2; u32 nonesp_marker; int err; + /* keepalive packet? */ + if (unlikely(len == 1)) { + u8 data; + + err = skb_copy_bits(skb, rxm->offset + 2, &data, 1); + if (err < 0) { + kfree_skb(skb); + return; + } + + if (data == 0xff) { + kfree_skb(skb); + return; + } + } + + /* drop other short messages */ + if (unlikely(len <= sizeof(nonesp_marker))) { + kfree_skb(skb); + return; + } + err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker, sizeof(nonesp_marker)); if (err < 0) { @@ -91,7 +114,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) return err; len = be16_to_cpu(blen); - if (len < 6) + if (len < 2) return -EINVAL; return len; -- cgit From 71b59bf482b2dd662774f34108c5b904efa9e02b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 29 Jul 2020 18:38:43 +0200 Subject: espintcp: count packets dropped in espintcp_rcv Currently, espintcp_rcv drops packets silently, which makes debugging issues difficult. Count packets as either XfrmInHdrError (when the packet was too short or contained invalid data) or XfrmInError (for other issues). Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/espintcp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 0a91b07f2b43..827ccdf2db57 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -15,6 +15,7 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, { if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, skb->truesize)) { + XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } @@ -59,6 +60,7 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) err = skb_copy_bits(skb, rxm->offset + 2, &data, 1); if (err < 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); kfree_skb(skb); return; } @@ -71,6 +73,7 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) /* drop other short messages */ if (unlikely(len <= sizeof(nonesp_marker))) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); kfree_skb(skb); return; } @@ -78,17 +81,20 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker, sizeof(nonesp_marker)); if (err < 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); kfree_skb(skb); return; } /* remove header, leave non-ESP marker/SPI */ if (!__pskb_pull(skb, rxm->offset + 2)) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } if (pskb_trim(skb, rxm->full_len - 2) != 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } -- cgit From 65ad3ef9fced4062dfd74e2f89443fb5ce184321 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 1 Jul 2020 19:20:24 -0700 Subject: mac80211: fix warning in 6 GHz IE addition in mesh mode The commit 24a2042cb22f ("mac80211: add HE 6 GHz Band Capability element") failed to check device capability before adding HE 6 GHz capability element. Below warning is reported in 11ac device in mesh. Fix that by checking device capability at HE 6 GHz cap IE addition in mesh beacon and association request. WARNING: CPU: 1 PID: 1897 at net/mac80211/util.c:2878 ieee80211_ie_build_he_6ghz_cap+0x149/0x150 [mac80211] [ 3138.720358] Call Trace: [ 3138.720361] ieee80211_mesh_build_beacon+0x462/0x530 [mac80211] [ 3138.720363] ieee80211_start_mesh+0xa8/0xf0 [mac80211] [ 3138.720365] __cfg80211_join_mesh+0x122/0x3e0 [cfg80211] [ 3138.720368] nl80211_join_mesh+0x3d3/0x510 [cfg80211] Fixes: 24a2042cb22f ("mac80211: add HE 6 GHz Band Capability element") Reported-by: Markus Theil Signed-off-by: Rajkumar Manoharan Link: https://lore.kernel.org/r/1593656424-18240-1-git-send-email-rmanohar@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 13 +++++++++++++ net/mac80211/util.c | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5f1ca25b6c97..e88beb3ff6db 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -617,6 +617,19 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { + struct ieee80211_supported_band *sband; + const struct ieee80211_sband_iftype_data *iftd; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + iftd = ieee80211_get_sband_iftype_data(sband, + NL80211_IFTYPE_MESH_POINT); + /* The device doesn't support HE in mesh mode or at all */ + if (!iftd) + return 0; + ieee80211_ie_build_he_6ghz_cap(sdata, skb); return 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 21c94094a699..dd9f5c7a1ade 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2878,6 +2878,10 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!iftd)) return; + /* Check for device HE 6 GHz capability before adding element */ + if (!iftd->he_6ghz_capa.capa) + return; + cap = le16_to_cpu(iftd->he_6ghz_capa.capa); cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS; -- cgit From 6a01afcf8468d3ca2bd8bbb27503f60dcf643b20 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Sat, 4 Jul 2020 15:50:07 +0200 Subject: mac80211: mesh: Free ie data when leaving mesh At ieee80211_join_mesh() some ie data could have been allocated (see copy_mesh_setup()) and need to be cleaned up when leaving the mesh. This fixes the following kmemleak report: unreferenced object 0xffff0000116bc600 (size 128): comm "wpa_supplicant", pid 608, jiffies 4294898983 (age 293.484s) hex dump (first 32 bytes): 30 14 01 00 00 0f ac 04 01 00 00 0f ac 04 01 00 0............... 00 0f ac 08 00 00 00 00 c4 65 40 00 00 00 00 00 .........e@..... backtrace: [<00000000bebe439d>] __kmalloc_track_caller+0x1c0/0x330 [<00000000a349dbe1>] kmemdup+0x28/0x50 [<0000000075d69baa>] ieee80211_join_mesh+0x6c/0x3b8 [mac80211] [<00000000683bb98b>] __cfg80211_join_mesh+0x1e8/0x4f0 [cfg80211] [<0000000072cb507f>] nl80211_join_mesh+0x520/0x6b8 [cfg80211] [<0000000077e9bcf9>] genl_family_rcv_msg+0x374/0x680 [<00000000b1bd936d>] genl_rcv_msg+0x78/0x108 [<0000000022c53788>] netlink_rcv_skb+0xb0/0x1c0 [<0000000011af8ec9>] genl_rcv+0x34/0x48 [<0000000069e41f53>] netlink_unicast+0x268/0x2e8 [<00000000a7517316>] netlink_sendmsg+0x320/0x4c0 [<0000000069cba205>] ____sys_sendmsg+0x354/0x3a0 [<00000000e06bab0f>] ___sys_sendmsg+0xd8/0x120 [<0000000037340728>] __sys_sendmsg+0xa4/0xf8 [<000000004fed9776>] __arm64_sys_sendmsg+0x44/0x58 [<000000001c1e5647>] el0_svc_handler+0xd0/0x1a0 Fixes: c80d545da3f7 (mac80211: Let userspace enable and configure vendor specific path selection.) Signed-off-by: Remi Pommarel Link: https://lore.kernel.org/r/20200704135007.27292-1-repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9b360544ad6f..1079a07e43e4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2166,6 +2166,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) ieee80211_stop_mesh(sdata); mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + kfree(sdata->u.mesh.ie); mutex_unlock(&sdata->local->mtx); return 0; -- cgit From 5e43540c2af0a0c0a18e39579b1ad49541f87506 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Sat, 4 Jul 2020 15:54:19 +0200 Subject: mac80211: mesh: Free pending skb when destroying a mpath A mpath object can hold reference on a list of skb that are waiting for mpath resolution to be sent. When destroying a mpath this skb list should be cleaned up in order to not leak memory. Fixing that kind of leak: unreferenced object 0xffff0000181c9300 (size 1088): comm "openvpn", pid 1782, jiffies 4295071698 (age 80.416s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 f9 80 36 00 00 00 00 00 ..........6..... 02 00 07 40 00 00 00 00 00 00 00 00 00 00 00 00 ...@............ backtrace: [<000000004bc6a443>] kmem_cache_alloc+0x1a4/0x2f0 [<000000002caaef13>] sk_prot_alloc.isra.39+0x34/0x178 [<00000000ceeaa916>] sk_alloc+0x34/0x228 [<00000000ca1f1d04>] inet_create+0x198/0x518 [<0000000035626b1c>] __sock_create+0x134/0x328 [<00000000a12b3a87>] __sys_socket+0xb0/0x158 [<00000000ff859f23>] __arm64_sys_socket+0x40/0x58 [<00000000263486ec>] el0_svc_handler+0xd0/0x1a0 [<0000000005b5157d>] el0_svc+0x8/0xc unreferenced object 0xffff000012973a40 (size 216): comm "openvpn", pid 1782, jiffies 4295082137 (age 38.660s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 c0 06 16 00 00 ff ff 00 93 1c 18 00 00 ff ff ................ backtrace: [<000000004bc6a443>] kmem_cache_alloc+0x1a4/0x2f0 [<0000000023c8c8f9>] __alloc_skb+0xc0/0x2b8 [<000000007ad950bb>] alloc_skb_with_frags+0x60/0x320 [<00000000ef90023a>] sock_alloc_send_pskb+0x388/0x3c0 [<00000000104fb1a3>] sock_alloc_send_skb+0x1c/0x28 [<000000006919d2dd>] __ip_append_data+0xba4/0x11f0 [<0000000083477587>] ip_make_skb+0x14c/0x1a8 [<0000000024f3d592>] udp_sendmsg+0xaf0/0xcf0 [<000000005aabe255>] inet_sendmsg+0x5c/0x80 [<000000008651ea08>] __sys_sendto+0x15c/0x218 [<000000003505c99b>] __arm64_sys_sendto+0x74/0x90 [<00000000263486ec>] el0_svc_handler+0xd0/0x1a0 [<0000000005b5157d>] el0_svc+0x8/0xc Fixes: 2bdaf386f99c (mac80211: mesh: move path tables into if_mesh) Signed-off-by: Remi Pommarel Link: https://lore.kernel.org/r/20200704135419.27703-1-repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 117519bf33d6..aca608ae313f 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -521,6 +521,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, del_timer_sync(&mpath->timer); atomic_dec(&sdata->u.mesh.mpaths); atomic_dec(&tbl->entries); + mesh_path_flush_pending(mpath); kfree_rcu(mpath, rcu); } -- cgit From 4052d3d2e8f47a15053320bbcbe365d15610437d Mon Sep 17 00:00:00 2001 From: Julian Squires Date: Mon, 6 Jul 2020 17:13:53 -0400 Subject: cfg80211: check vendor command doit pointer before use In the case where a vendor command does not implement doit, and has no flags set, doit would not be validated and a NULL pointer dereference would occur, for example when invoking the vendor command via iw. I encountered this while developing new vendor commands. Perhaps in practice it is advisable to always implement doit along with dumpit, but it seems reasonable to me to always check doit anyway, not just when NEED_WDEV. Signed-off-by: Julian Squires Link: https://lore.kernel.org/r/20200706211353.2366470-1-julian@cipht.net Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0e07fb8585fb..7fbca0854265 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13266,13 +13266,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) if (!wdev_running(wdev)) return -ENETDOWN; } - - if (!vcmd->doit) - return -EOPNOTSUPP; } else { wdev = NULL; } + if (!vcmd->doit) + return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); -- cgit From e61fbfca8055eef18f11727f431e1e3e5221291e Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Wed, 22 Jul 2020 19:50:17 +0530 Subject: mac80211: Fix bug in Tx ack status reporting in 802.3 xmit path Allocated ack_frame id from local->ack_status_frames is not really stored in the tx_info for 802.3 Tx path. Due to this, tx ack status is not reported and ack_frame id is not freed for the buffers requiring tx ack status. Also move the memset to 0 of tx_info before IEEE80211_TX_CTL_REQ_TX_STATUS flag assignment. Fixes: 50ff477a8639 ("mac80211: add 802.11 encapsulation offloading support") Signed-off-by: Vasanthakumar Thiagarajan Link: https://lore.kernel.org/r/1595427617-1713-1-git-send-email-vthiagar@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1a2941e5244f..3529d1368068 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4230,11 +4230,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) goto out_free; + memset(info, 0, sizeof(*info)); + if (unlikely(!multicast && skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - ieee80211_store_ack_skb(local, skb, &info->flags, NULL); - - memset(info, 0, sizeof(*info)); + info->ack_frame_id = ieee80211_store_ack_skb(local, skb, + &info->flags, NULL); if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) { if (sdata->control_port_no_encrypt) -- cgit From 04e35caa32ec9aae6b306d07f07dc2ee6d69166c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 25 Jul 2020 10:45:33 +0200 Subject: mac80211: remove STA txq pending airtime underflow warning This warning can trigger if there is a mismatch between frames that were sent with the sta pointer set vs tx status frames reported for the sta address. This can happen due to race conditions on re-creating stations, or even in the case of .sta_add/remove being used instead of .sta_state, which can cause frames to be sent to a station that has not been uploaded yet. If there is an actual underflow issue, it should show up in the device airtime warning below, so it is better to remove this one. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200725084533.13829-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cd8487bc6fc2..af4cc5fb678e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1923,9 +1923,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, if (sta) { tx_pending = atomic_sub_return(tx_airtime, &sta->airtime[ac].aql_tx_pending); - if (WARN_ONCE(tx_pending < 0, - "STA %pM AC %d txq pending airtime underflow: %u, %u", - sta->addr, ac, tx_pending, tx_airtime)) + if (tx_pending < 0) atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, tx_pending, 0); } -- cgit From 1050242fa657093d3788c3ffa0bd9b42e4d7d1ab Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 18 Jun 2020 14:54:53 +0800 Subject: e1000e: continue to init PHY even when failed to disable ULP After 'commit e086ba2fccda4 ("e1000e: disable s0ix entry and exit flows for ME systems")', ThinkPad P14s always failed to disable ULP by ME. 'commit 0c80cdbf3320 ("e1000e: Warn if disabling ULP failed")' break out of init phy: error log: [ 42.364753] e1000e 0000:00:1f.6 enp0s31f6: Failed to disable ULP [ 42.524626] e1000e 0000:00:1f.6 enp0s31f6: PHY Wakeup cause - Unicast Packet [ 42.822476] e1000e 0000:00:1f.6 enp0s31f6: Hardware Error When disable s0ix, E1000_FWSM_ULP_CFG_DONE will never be 1. If continue to init phy like before, it can work as before. iperf test result good too. Fixes: 0c80cdbf3320 ("e1000e: Warn if disabling ULP failed") Signed-off-by: Aaron Ma Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index f999cca37a8a..489bb5b59475 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -301,10 +301,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) */ hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_unknown; ret_val = e1000_disable_ulp_lpt_lp(hw, true); - if (ret_val) { + if (ret_val) e_warn("Failed to disable ULP\n"); - goto out; - } ret_val = hw->phy.ops.acquire(hw); if (ret_val) { -- cgit From 024a8168b749db7a4aa40a5fbdfa04bf7e77c1c0 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Thu, 2 Jul 2020 15:39:06 -0700 Subject: igb: reinit_locked() should be called with rtnl_lock We observed two panics involving races with igb_reset_task. The first panic is caused by this race condition: kworker reboot -f igb_reset_task igb_reinit_locked igb_down napi_synchronize __igb_shutdown igb_clear_interrupt_scheme igb_free_q_vectors igb_free_q_vector adapter->q_vector[v_idx] = NULL; napi_disable Panics trying to access adapter->q_vector[v_idx].napi_state The second panic (a divide error) is caused by this race: kworker reboot -f tx packet igb_reset_task __igb_shutdown rtnl_lock() ... igb_clear_interrupt_scheme igb_free_q_vectors adapter->num_tx_queues = 0 ... rtnl_unlock() rtnl_lock() igb_reinit_locked igb_down igb_up netif_tx_start_all_queues dev_hard_start_xmit igb_xmit_frame igb_tx_queue_mapping Panics on r_idx % adapter->num_tx_queues This commit applies to igb_reset_task the same changes that were applied to ixgbe in commit 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver"), commit 8f4c5c9fb87a ("ixgbe: reinit_locked() should be called with rtnl_lock") and commit 88adce4ea8f9 ("ixgbe: fix possible race in reset subtask"). Signed-off-by: Francesco Ruggeri Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8bb3db2cbd41..6e5861bfb0fa 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6224,9 +6224,18 @@ static void igb_reset_task(struct work_struct *work) struct igb_adapter *adapter; adapter = container_of(work, struct igb_adapter, reset_task); + rtnl_lock(); + /* If we're already down or resetting, just bail */ + if (test_bit(__IGB_DOWN, &adapter->state) || + test_bit(__IGB_RESETTING, &adapter->state)) { + rtnl_unlock(); + return; + } + igb_dump(adapter); netdev_err(adapter->netdev, "Reset adapter\n"); igb_reinit_locked(adapter); + rtnl_unlock(); } /** -- cgit From 4f010246b4087ab931b060481014ec110e6a8a46 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Jul 2020 18:09:00 +0200 Subject: net/bpfilter: Initialize pos in __bpfilter_process_sockopt __bpfilter_process_sockopt never initialized the pos variable passed to the pipe write. This has been mostly harmless in the past as pipes ignore the offset, but the switch to kernel_write now verified the position, which can lead to a failure depending on the exact stack initialization pattern. Initialize the variable to zero to make rw_verify_area happy. Fixes: 6955a76fbcd5 ("bpfilter: switch to kernel_write") Reported-by: Christian Brauner Reported-by: Rodrigo Madera Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Tested-by: Rodrigo Madera Tested-by: Christian Brauner Reviewed-by: Christian Brauner Link: https://lore.kernel.org/bpf/20200730160900.187157-1-hch@lst.de --- net/bpfilter/bpfilter_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index c0f0990f30b6..cfb27166bfd7 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -39,7 +39,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, { struct mbox_request req; struct mbox_reply reply; - loff_t pos; + loff_t pos = 0; ssize_t n; int ret = -EFAULT; -- cgit From 8c0de6e96c9794cb523a516c465991a70245da1c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 Jul 2020 15:40:53 -0700 Subject: ipv6: fix memory leaks on IPV6_ADDRFORM path IPV6_ADDRFORM causes resource leaks when converting an IPv6 socket to IPv4, particularly struct ipv6_ac_socklist. Similar to struct ipv6_mc_socklist, we should just close it on this path. This bug can be easily reproduced with the following C program: #include #include #include #include #include int main() { int s, value; struct sockaddr_in6 addr; struct ipv6_mreq m6; s = socket(AF_INET6, SOCK_DGRAM, 0); addr.sin6_family = AF_INET6; addr.sin6_port = htons(5000); inet_pton(AF_INET6, "::ffff:192.168.122.194", &addr.sin6_addr); connect(s, (struct sockaddr *)&addr, sizeof(addr)); inet_pton(AF_INET6, "fe80::AAAA", &m6.ipv6mr_multiaddr); m6.ipv6mr_interface = 5; setsockopt(s, SOL_IPV6, IPV6_JOIN_ANYCAST, &m6, sizeof(m6)); value = AF_INET; setsockopt(s, SOL_IPV6, IPV6_ADDRFORM, &value, sizeof(value)); close(s); return 0; } Reported-by: ch3332xr@gmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/anycast.c | 17 ++++++++++++----- net/ipv6/ipv6_sockglue.c | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index fdb07105384c..8418b7d38468 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -274,6 +274,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); +void __ipv6_sock_ac_close(struct sock *sk); void ipv6_sock_ac_close(struct sock *sk); int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 893261230ffc..dacdea7fcb62 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return 0; } -void ipv6_sock_ac_close(struct sock *sk) +void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; @@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk) struct net *net = sock_net(sk); int prev_index; - if (!np->ipv6_ac_list) - return; - - rtnl_lock(); + ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; @@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } +} + +void ipv6_sock_ac_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!np->ipv6_ac_list) + return; + rtnl_lock(); + __ipv6_sock_ac_close(sk); rtnl_unlock(); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 20576e87a5f7..76f9e41859a2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -240,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); + __ipv6_sock_ac_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so -- cgit From 4bbca662df2523ff7ad3224463f1f28e6a118044 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 27 Jul 2020 19:04:55 +0800 Subject: selftests/bpf: fix netdevsim trap_flow_action_cookie read When read netdevsim trap_flow_action_cookie, we need to init it first, or we will get "Invalid argument" error. Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_offload.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 8294ae3ffb3c..43c9cda199b8 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -318,6 +318,9 @@ class DebugfsDir: continue if os.path.isfile(p): + # We need to init trap_flow_action_cookie before read it + if f == "trap_flow_action_cookie": + cmd('echo deadbeef > %s/%s' % (path, f)) _, out = cmd('cat %s/%s' % (path, f)) dfs[f] = out.strip() elif os.path.isdir(p): -- cgit From 591eee6d0783d4a0d7f0e7c4effdba9cf6c1e2c5 Mon Sep 17 00:00:00 2001 From: Joyce Ooi Date: Mon, 27 Jul 2020 17:46:41 +0800 Subject: MAINTAINERS: Replace Thor Thayer as Altera Triple Speed Ethernet maintainer This patch is to replace Thor Thayer as Altera Triple Speed Ethernet maintainer as he is moving to a different role. Signed-off-by: Joyce Ooi Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f0569cf304ca..9fb232860d9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -782,7 +782,7 @@ F: include/dt-bindings/reset/altr,rst-mgr-a10sr.h F: include/linux/mfd/altera-a10sr.h ALTERA TRIPLE SPEED ETHERNET DRIVER -M: Thor Thayer +M: Joyce Ooi L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/altera/ -- cgit From 65550098c1c4db528400c73acf3e46bfa78d9264 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Jul 2020 00:03:56 +0100 Subject: rxrpc: Fix race between recvmsg and sendmsg on immediate call failure There's a race between rxrpc_sendmsg setting up a call, but then failing to send anything on it due to an error, and recvmsg() seeing the call completion occur and trying to return the state to the user. An assertion fails in rxrpc_recvmsg() because the call has already been released from the socket and is about to be released again as recvmsg deals with it. (The recvmsg_q queue on the socket holds a ref, so there's no problem with use-after-free.) We also have to be careful not to end up reporting an error twice, in such a way that both returns indicate to userspace that the user ID supplied with the call is no longer in use - which could cause the client to malfunction if it recycles the user ID fast enough. Fix this by the following means: (1) When sendmsg() creates a call after the point that the call has been successfully added to the socket, don't return any errors through sendmsg(), but rather complete the call and let recvmsg() retrieve them. Make sendmsg() return 0 at this point. Further calls to sendmsg() for that call will fail with ESHUTDOWN. Note that at this point, we haven't send any packets yet, so the server doesn't yet know about the call. (2) If sendmsg() returns an error when it was expected to create a new call, it means that the user ID wasn't used. (3) Mark the call disconnected before marking it completed to prevent an oops in rxrpc_release_call(). (4) recvmsg() will then retrieve the error and set MSG_EOR to indicate that the user ID is no longer known by the kernel. An oops like the following is produced: kernel BUG at net/rxrpc/recvmsg.c:605! ... RIP: 0010:rxrpc_recvmsg+0x256/0x5ae ... Call Trace: ? __init_waitqueue_head+0x2f/0x2f ____sys_recvmsg+0x8a/0x148 ? import_iovec+0x69/0x9c ? copy_msghdr_from_user+0x5c/0x86 ___sys_recvmsg+0x72/0xaa ? __fget_files+0x22/0x57 ? __fget_light+0x46/0x51 ? fdget+0x9/0x1b do_recvmmsg+0x15e/0x232 ? _raw_spin_unlock+0xa/0xb ? vtime_delta+0xf/0x25 __x64_sys_recvmmsg+0x2c/0x2f do_syscall_64+0x4c/0x78 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 357f5ef64628 ("rxrpc: Call rxrpc_release_call() on error in rxrpc_new_client_call()") Reported-by: syzbot+b54969381df354936d96@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller --- net/rxrpc/call_object.c | 27 +++++++++++++++++++-------- net/rxrpc/conn_object.c | 8 +++++--- net/rxrpc/recvmsg.c | 2 +- net/rxrpc/sendmsg.c | 3 +++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f07970207b54..38a46167523f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, */ ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) - goto error; + goto error_attached_to_socket; trace_rxrpc_call(call->debug_id, rxrpc_call_connected, atomic_read(&call->usage), here, NULL); @@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - ret = -EEXIST; - -error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + RX_CALL_DEAD, -EEXIST); trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(ret)); + atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = -EEXIST"); + return ERR_PTR(-EEXIST); + + /* We got an error, but the call is attached to the socket and is in + * need of release. However, we might now race with recvmsg() when + * completing the call queues it. Return 0 from sys_sendmsg() and + * leave the error to recvmsg() to deal with. + */ +error_attached_to_socket: + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + atomic_read(&call->usage), here, ERR_PTR(ret)); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + _leave(" = c=%08x [err]", call->debug_id); + return call; } /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 19e141eeed17..8cbe0bf20ed5 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; - spin_lock_bh(&conn->params.peer->lock); - hlist_del_rcu(&call->error_link); - spin_unlock_bh(&conn->params.peer->lock); + if (!hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_del_rcu(&call->error_link); + spin_unlock_bh(&call->peer->lock); + } if (rxrpc_is_client_call(call)) return rxrpc_disconnect_client_call(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 490b1927215c..efecc5a8f67d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -620,7 +620,7 @@ try_again: goto error_unlock_call; } - if (msg->msg_name) { + if (msg->msg_name && call->peer) { struct sockaddr_rxrpc *srx = msg->msg_name; size_t len = sizeof(call->peer->srx); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 03a30d014bb6..f3f6da6e4ad2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -681,6 +681,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ + ret = 0; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + goto out_put_unlock; } else { switch (READ_ONCE(call->state)) { case RXRPC_CALL_UNINITIALISED: -- cgit From 82274d075536322368ce710b211c41c37c4740b9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Jul 2020 16:15:07 -0700 Subject: devlink: ignore -EOPNOTSUPP errors on dumpit Number of .dumpit functions try to ignore -EOPNOTSUPP errors. Recent change missed that, and started reporting all errors but -EMSGSIZE back from dumps. This leads to situation like this: $ devlink dev info devlink answers: Operation not supported Dump should not report an error just because the last device to be queried could not provide an answer. To fix this and avoid similar confusion make sure we clear err properly, and not leave it set to an error if we don't terminate the iteration. Fixes: c62c2cfb801b ("net: devlink: don't ignore errors during dumpit") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index dc2b18475956..47f14a2f25fb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1065,7 +1065,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -1266,7 +1268,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -1498,7 +1502,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -3299,7 +3305,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -3569,7 +3577,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -4518,7 +4528,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); mutex_unlock(&devlink->lock); - if (err && err != -EOPNOTSUPP) + if (err == -EOPNOTSUPP) + err = 0; + else if (err) break; idx++; } -- cgit From 366228ed01f6882cc203e3d5b40010dfae0be1c3 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Wed, 29 Jul 2020 11:50:05 +0800 Subject: net: nixge: fix potential memory leak in nixge_probe() If some processes in nixge_probe() fail, free_netdev(dev) needs to be called to aviod a memory leak. Fixes: 87ab207981ec ("net: nixge: Separate ctrl and dma resources") Fixes: abcd3d6fc640 ("net: nixge: Fix error path for obtaining mac address") Reported-by: Hulk Robot Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/ni/nixge.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index d2708a57f2ff..4075f5e59955 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1299,19 +1299,21 @@ static int nixge_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->napi, nixge_poll, NAPI_POLL_WEIGHT); err = nixge_of_get_resources(pdev); if (err) - return err; + goto free_netdev; __nixge_hw_set_mac_address(ndev); priv->tx_irq = platform_get_irq_byname(pdev, "tx"); if (priv->tx_irq < 0) { netdev_err(ndev, "could not find 'tx' irq"); - return priv->tx_irq; + err = priv->tx_irq; + goto free_netdev; } priv->rx_irq = platform_get_irq_byname(pdev, "rx"); if (priv->rx_irq < 0) { netdev_err(ndev, "could not find 'rx' irq"); - return priv->rx_irq; + err = priv->rx_irq; + goto free_netdev; } priv->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; -- cgit From 555a893303872e044fb86f0a5834ce78d41ad2e2 Mon Sep 17 00:00:00 2001 From: Landen Chao Date: Wed, 29 Jul 2020 10:15:17 +0200 Subject: net: ethernet: mtk_eth_soc: fix MTU warnings in recent kernel versions there are warnings about incorrect MTU size like these: eth0: mtu greater than device maximum mtk_soc_eth 1b100000.ethernet eth0: error -22 setting MTU to include DSA overhead Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Fixes: 72579e14a1d3 ("net: dsa: don't fail to probe if we couldn't set the MTU") Fixes: 7a4c53bee332 ("net: report invalid mtu value via netlink extack") Signed-off-by: Landen Chao Signed-off-by: Frank Wunderlich Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 85735d32ecb0..a1c45b39a230 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2891,6 +2891,8 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->irq = eth->irq[0]; eth->netdev[id]->dev.of_node = np; + eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + return 0; free_netdev: -- cgit From 51875dad43b44241b46a569493f1e4bfa0386d86 Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Wed, 29 Jul 2020 21:06:59 +0800 Subject: atm: fix atm_dev refcnt leaks in atmtcp_remove_persistent atmtcp_remove_persistent() invokes atm_dev_lookup(), which returns a reference of atm_dev with increased refcount or NULL if fails. The refcount leaks issues occur in two error handling paths. If dev_data->persist is zero or PRIV(dev)->vcc isn't NULL, the function returns 0 without decreasing the refcount kept by a local variable, resulting in refcount leaks. Fix the issue by adding atm_dev_put() before returning 0 both when dev_data->persist is zero or PRIV(dev)->vcc isn't NULL. Signed-off-by: Xin Xiong Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: David S. Miller --- drivers/atm/atmtcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index d9fd70280482..7f814da3c2d0 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -433,9 +433,15 @@ static int atmtcp_remove_persistent(int itf) return -EMEDIUMTYPE; } dev_data = PRIV(dev); - if (!dev_data->persist) return 0; + if (!dev_data->persist) { + atm_dev_put(dev); + return 0; + } dev_data->persist = 0; - if (PRIV(dev)->vcc) return 0; + if (PRIV(dev)->vcc) { + atm_dev_put(dev); + return 0; + } kfree(dev_data); atm_dev_put(dev); atm_dev_deregister(dev); -- cgit From 59929fbb45e06da7d501d3a97f10a91912181f7c Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 29 Jul 2020 10:52:17 -0700 Subject: ionic: unlock queue mutex in error path On an error return, jump to the unlock at the end to be sure to unlock the queue_lock mutex. Fixes: 0925e9db4dc8 ("ionic: use mutex to protect queue operations") Reported-by: kernel test robot Reported-by: Julia Lawall Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 5fd31ba56937..e55d41546cff 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2001,7 +2001,7 @@ int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg) netif_device_detach(lif->netdev); err = ionic_stop(lif->netdev); if (err) - return err; + goto reset_out; } if (cb) @@ -2011,6 +2011,8 @@ int ionic_reset_queues(struct ionic_lif *lif, ionic_reset_cb cb, void *arg) err = ionic_open(lif->netdev); netif_device_attach(lif->netdev); } + +reset_out: mutex_unlock(&lif->queue_lock); return err; -- cgit From 85496a29224188051b6135eb38da8afd4c584765 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 30 Jul 2020 15:30:00 +0800 Subject: net: gemini: Fix missing clk_disable_unprepare() in error path of gemini_ethernet_port_probe() Fix the missing clk_disable_unprepare() before return from gemini_ethernet_port_probe() in the error handling case. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/cortina/gemini.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 8d13ea370db1..66e67b24a887 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2446,6 +2446,7 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) port->reset = devm_reset_control_get_exclusive(dev, NULL); if (IS_ERR(port->reset)) { dev_err(dev, "no reset\n"); + clk_disable_unprepare(port->pclk); return PTR_ERR(port->reset); } reset_control_reset(port->reset); @@ -2501,8 +2502,10 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) IRQF_SHARED, port_names[port->id], port); - if (ret) + if (ret) { + clk_disable_unprepare(port->pclk); return ret; + } ret = register_netdev(netdev); if (!ret) { -- cgit From 01cefbbe2c51fa94d4221ca63d236907c5e19fe3 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 20 Jul 2020 01:36:45 +0000 Subject: net/mlx5e: CT: Support restore ipv6 tunnel Currently the driver restores only IPv4 tunnel headers. Add support for restoring IPv6 tunnel header. Fixes: b8ce90370977 ("net/mlx5e: Restore tunnel metadata on miss") Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 30 +++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index eefeb1cdc2ee..245a99f69641 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -551,19 +551,31 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, } } - tun_dst = tun_rx_dst(enc_opts.key.len); + if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, 0, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + } else { + netdev_dbg(priv->netdev, + "Couldn't restore tunnel, unsupported addr_type: %d\n", + key.enc_control.addr_type); + return false; + } + if (!tun_dst) { - WARN_ON_ONCE(true); + netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); return false; } - ip_tunnel_key_init(&tun_dst->u.tun_info.key, - key.enc_ipv4.src, key.enc_ipv4.dst, - key.enc_ip.tos, key.enc_ip.ttl, - 0, /* label */ - key.enc_tp.src, key.enc_tp.dst, - key32_to_tunnel_id(key.enc_key_id.keyid), - TUNNEL_KEY); + tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; if (enc_opts.key.len) ip_tunnel_info_opts_set(&tun_dst->u.tun_info, -- cgit From 0faddfe6b7953e224a1283f89f671ef6a9ba73de Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 2 Jul 2020 01:06:37 +0000 Subject: net/mlx5e: E-Switch, Add misc bit when misc fields changed for mirroring The modified flow_context fields in FTE must be indicated in modify_enable bitmask. Previously, the misc bit in modify_enable is always set as source vport must be set for each rule. So, when parsing vxlan/gre/geneve/qinq rules, this bit is not set because those are all from the same misc fileds that source vport fields are located at, and we don't need to set the indicator twice. After adding per vport tables for mirroring, misc bit is not set, then firmware syndrome happens. To fix it, set the bit wherever misc fileds are changed. This also makes it unnecessary to check misc fields and set the misc bit accordingly in metadata matching, so here remove it. Besides, flow_source must be specified for uplink because firmware will check it and some actions are only allowed for packets received from uplink. Fixes: 96e326878fa5 ("net/mlx5e: Eswitch, Use per vport tables for mirroring") Signed-off-by: Jianbo Liu Reviewed-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index 951ea26d96bc..e472ed0eacfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -301,6 +301,8 @@ static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); } + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c index 58b13192df23..2805416c32a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -80,6 +80,8 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); } + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 37b176801bcc..038a0f1cecec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -136,6 +136,8 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, be32_to_cpu(enc_keyid.key->keyid)); + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cc8412151ca0..fcedb5bdca9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2356,6 +2356,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, match.key->vlan_priority); *match_level = MLX5_MATCH_L2; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 060354bb211a..d70d6c099582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -259,9 +259,6 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, mlx5_eswitch_get_vport_metadata_mask()); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc))) - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); @@ -380,6 +377,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, flow_act.modify_hdr = attr->modify_hdr; if (split) { + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; fdb = esw_vport_tbl_get(esw, attr); } else { if (attr->chain || attr->prio) -- cgit From 6f7bbad18e8343da6318654f408dea0ccd6efb00 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 3 Jul 2020 02:34:23 +0000 Subject: net/mlx5e: E-Switch, Specify flow_source for rule with no in_port The flow_source must be specified, even for rule without matching source vport, because some actions are only allowed in uplink. Otherwise, rule can't be offloaded and firmware syndrome happens. Fixes: 6fb0701a9cfa ("net/mlx5: E-Switch, Add support for offloading rules with no in_port") Signed-off-by: Jianbo Liu Reviewed-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d70d6c099582..ed75353c56b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -236,6 +236,15 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, return &esw->offloads.vport_reps[idx]; } +static void +mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr && attr->in_rep && attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, @@ -276,10 +285,6 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; } - - if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && - attr->in_rep->vport == MLX5_VPORT_UPLINK) - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; } struct mlx5_flow_handle * @@ -377,9 +382,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, flow_act.modify_hdr = attr->modify_hdr; if (split) { - if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && - attr->in_rep->vport == MLX5_VPORT_UPLINK) - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; fdb = esw_vport_tbl_get(esw, attr); } else { if (attr->chain || attr->prio) @@ -396,6 +398,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } + mlx5_eswitch_set_rule_flow_source(esw, spec, attr); + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); @@ -462,6 +466,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, i++; mlx5_eswitch_set_rule_source_port(esw, spec, attr); + mlx5_eswitch_set_rule_flow_source(esw, spec, attr); if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; -- cgit From e692139e6af339a1495ef401b2d95f7f9d1c7a44 Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Thu, 30 Jul 2020 18:29:41 +0800 Subject: net/mlx5e: fix bpf_prog reference count leaks in mlx5e_alloc_rq The function invokes bpf_prog_inc(), which increases the reference count of a bpf_prog object "rq->xdp_prog" if the object isn't NULL. The refcount leak issues take place in two error handling paths. When either mlx5_wq_ll_create() or mlx5_wq_cyc_create() fails, the function simply returns the error code and forgets to drop the reference count increased earlier, causing a reference count leak of "rq->xdp_prog". Fix this issue by jumping to the error handling path err_rq_wq_destroy while either function fails. Fixes: 422d4c401edd ("net/mlx5e: RX, Split WQ objects for different RQ types") Signed-off-by: Xin Xiong Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 07fdbea7ea13..3b892ec301b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -419,7 +419,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) - return err; + goto err_rq_wq_destroy; rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; @@ -470,7 +470,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - return err; + goto err_rq_wq_destroy; rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; -- cgit From 8c5c51f5cac676e9065cb6de9feaa3a16a462675 Mon Sep 17 00:00:00 2001 From: liujian Date: Thu, 30 Jul 2020 16:14:28 +0800 Subject: net/sched: The error lable position is corrected in ct_init_module Exchange the positions of the err_tbl_init and err_register labels in ct_init_module function. Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone") Signed-off-by: liujian Signed-off-by: David S. Miller --- net/sched/act_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 5928efb6449c..6ed1652d1e26 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1543,10 +1543,10 @@ static int __init ct_init_module(void) return 0; -err_tbl_init: - destroy_workqueue(act_ct_wq); err_register: tcf_ct_flow_tables_uninit(); +err_tbl_init: + destroy_workqueue(act_ct_wq); return err; } -- cgit From bbc8a99e952226c585ac17477a85ef1194501762 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Thu, 30 Jul 2020 15:20:26 -0400 Subject: rds: Prevent kernel-infoleak in rds_notify_queue_get() rds_notify_queue_get() is potentially copying uninitialized kernel stack memory to userspace since the compiler may leave a 4-byte hole at the end of `cmsg`. In 2016 we tried to fix this issue by doing `= { 0 };` on `cmsg`, which unfortunately does not always initialize that 4-byte hole. Fix it by using memset() instead. Cc: stable@vger.kernel.org Fixes: f037590fff30 ("rds: fix a leak of kernel memory") Fixes: bdbe6fbc6a2f ("RDS: recv.c") Suggested-by: Dan Carpenter Signed-off-by: Peilin Ye Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/recv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rds/recv.c b/net/rds/recv.c index c8404971d5ab..aba4afe4dfed 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; - struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ + struct rds_rdma_notify cmsg; unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); int err = 0; + memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ /* put_cmsg copies to user space and thus may sleep. We can't do this * with rs_lock held, so first grab as many notifications as we can stuff -- cgit From fda2ec62cf1aa7cbee52289dc8059cd3662795da Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 1 Aug 2020 07:07:50 +0000 Subject: vxlan: fix memleak of fdb When vxlan interface is deleted, all fdbs are deleted by vxlan_flush(). vxlan_flush() flushes fdbs but it doesn't delete fdb, which contains all-zeros-mac because it is deleted by vxlan_uninit(). But vxlan_uninit() deletes only the fdb, which contains both all-zeros-mac and default vni. So, the fdb, which contains both all-zeros-mac and non-default vni will not be deleted. Test commands: ip link add vxlan0 type vxlan dstport 4789 external ip link set vxlan0 up bridge fdb add to 00:00:00:00:00:00 dst 172.0.0.1 dev vxlan0 via lo \ src_vni 10000 self permanent ip link del vxlan0 kmemleak reports as follows: unreferenced object 0xffff9486b25ced88 (size 96): comm "bridge", pid 2151, jiffies 4294701712 (age 35506.901s) hex dump (first 32 bytes): 02 00 00 00 ac 00 00 01 40 00 09 b1 86 94 ff ff ........@....... 46 02 00 00 00 00 00 00 a7 03 00 00 12 b5 6a 6b F.............jk backtrace: [<00000000c10cf651>] vxlan_fdb_append.part.51+0x3c/0xf0 [vxlan] [<000000006b31a8d9>] vxlan_fdb_create+0x184/0x1a0 [vxlan] [<0000000049399045>] vxlan_fdb_update+0x12f/0x220 [vxlan] [<0000000090b1ef00>] vxlan_fdb_add+0x12a/0x1b0 [vxlan] [<0000000056633c2c>] rtnl_fdb_add+0x187/0x270 [<00000000dd5dfb6b>] rtnetlink_rcv_msg+0x264/0x490 [<00000000fc44dd54>] netlink_rcv_skb+0x4a/0x110 [<00000000dff433e7>] netlink_unicast+0x18e/0x250 [<00000000b87fb421>] netlink_sendmsg+0x2e9/0x400 [<000000002ed55153>] ____sys_sendmsg+0x237/0x260 [<00000000faa51c66>] ___sys_sendmsg+0x88/0xd0 [<000000006c3982f1>] __sys_sendmsg+0x4e/0x80 [<00000000a8f875d2>] do_syscall_64+0x56/0xe0 [<000000003610eefa>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff9486b1c40080 (size 128): comm "bridge", pid 2157, jiffies 4294701754 (age 35506.866s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 f8 dc 42 b2 86 94 ff ff ..........B..... 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk backtrace: [<00000000a2981b60>] vxlan_fdb_create+0x67/0x1a0 [vxlan] [<0000000049399045>] vxlan_fdb_update+0x12f/0x220 [vxlan] [<0000000090b1ef00>] vxlan_fdb_add+0x12a/0x1b0 [vxlan] [<0000000056633c2c>] rtnl_fdb_add+0x187/0x270 [<00000000dd5dfb6b>] rtnetlink_rcv_msg+0x264/0x490 [<00000000fc44dd54>] netlink_rcv_skb+0x4a/0x110 [<00000000dff433e7>] netlink_unicast+0x18e/0x250 [<00000000b87fb421>] netlink_sendmsg+0x2e9/0x400 [<000000002ed55153>] ____sys_sendmsg+0x237/0x260 [<00000000faa51c66>] ___sys_sendmsg+0x88/0xd0 [<000000006c3982f1>] __sys_sendmsg+0x4e/0x80 [<00000000a8f875d2>] do_syscall_64+0x56/0xe0 [<000000003610eefa>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 3ad7a4b141eb ("vxlan: support fdb and learning in COLLECT_METADATA mode") Signed-off-by: Taehee Yoo Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5efe1e28f270..a7c3939264b0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3076,8 +3076,10 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP))) continue; /* the all_zeros_mac entry is deleted at vxlan_uninit */ - if (!is_zero_ether_addr(f->eth_addr)) - vxlan_fdb_destroy(vxlan, f, true, true); + if (is_zero_ether_addr(f->eth_addr) && + f->vni == vxlan->cfg.vni) + continue; + vxlan_fdb_destroy(vxlan, f, true, true); } spin_unlock_bh(&vxlan->hash_lock[h]); } -- cgit