From 050668c10047802a2b62cbf8db834c2c84042b87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 29 Oct 2019 15:51:03 +0100 Subject: bpf, doc: Add Andrii as official reviewer to BPF subsystem Andrii Nakryiko has been part of our weekly BPF patch review rotation for quite some time now and provided excellent and timely feedback on BPF patches, therefore give credit where credit is due and add him officially to the BPF core reviewer team to the MAINTAINERS file. Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/af565dbef3b0b35040f26bfd16ed59cc0bae8066.1572360528.git.daniel@iogearbox.net --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e51a68bf8ca8..808bac0e3847 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3051,6 +3051,7 @@ M: Daniel Borkmann R: Martin KaFai Lau R: Song Liu R: Yonghong Song +R: Andrii Nakryiko L: netdev@vger.kernel.org L: bpf@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git -- cgit From 7541c87c9b7a7e07c84481f37f2c19063b44469b Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 28 Oct 2019 13:29:02 +0100 Subject: bpf: Allow narrow loads of bpf_sysctl fields with offset > 0 "ctx:file_pos sysctl:read read ok narrow" works on s390 by accident: it reads the wrong byte, which happens to have the expected value of 0. Improve the test by seeking to the 4th byte and expecting 4 instead of 0. This makes the latent problem apparent: the test attempts to read the first byte of bpf_sysctl.file_pos, assuming this is the least-significant byte, which is not the case on big-endian machines: a non-zero offset is needed. The point of the test is to verify narrow loads, so we cannot cheat our way out by simply using BPF_W. The existence of the test means that such loads have to be supported, most likely because llvm can generate them. Fix the test by adding a big-endian variant, which uses an offset to access the least-significant byte of bpf_sysctl.file_pos. This reveals the final problem: verifier rejects accesses to bpf_sysctl fields with offset > 0. Such accesses are already allowed for a wide range of structs: __sk_buff, bpf_sock_addr and sk_msg_md to name a few. Extend this support to bpf_sysctl by using bpf_ctx_range instead of offsetof when matching field offsets. Fixes: 7b146cebe30c ("bpf: Sysctl hook") Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx") Fixes: 9a1027e52535 ("selftests/bpf: Test file_pos field in bpf_sysctl ctx") Signed-off-by: Ilya Leoshkevich Signed-off-by: Alexei Starovoitov Acked-by: Andrey Ignatov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191028122902.9763-1-iii@linux.ibm.com --- kernel/bpf/cgroup.c | 4 ++-- tools/testing/selftests/bpf/test_sysctl.c | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ddd8addcdb5c..a3eaf08e7dd3 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1311,12 +1311,12 @@ static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, return false; switch (off) { - case offsetof(struct bpf_sysctl, write): + case bpf_ctx_range(struct bpf_sysctl, write): if (type != BPF_READ) return false; bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); - case offsetof(struct bpf_sysctl, file_pos): + case bpf_ctx_range(struct bpf_sysctl, file_pos): if (type == BPF_READ) { bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a320e3844b17..7c6e5b173f33 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -161,9 +161,14 @@ static struct sysctl_test tests[] = { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos) + 3), +#endif + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2), /* return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), @@ -176,6 +181,7 @@ static struct sysctl_test tests[] = { .attach_type = BPF_CGROUP_SYSCTL, .sysctl = "kernel/ostype", .open_flags = O_RDONLY, + .seek = 4, .result = SUCCESS, }, { -- cgit From 6bd7cf66578fae18c26d92115058482cc74ca71b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 1 Oct 2019 13:33:06 +0200 Subject: perf tools: Make usage of test_attr__* optional for perf-sys.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For users of perf-sys.h outside perf, e.g. samples/bpf/bpf_load.c, it's convenient not to depend on test_attr__*. After commit 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h"), all users of perf-sys.h will depend on test_attr__enabled and test_attr__open. This commit enables a user to define HAVE_ATTR_TEST to zero in order to omit the test dependency. Fixes: 91854f9a077e ("perf tools: Move everything related to sys_perf_event_open() to perf-sys.h") Signed-off-by: Björn Töpel Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Namhyung Kim Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Link: http://lore.kernel.org/bpf/20191001113307.27796-2-bjorn.topel@gmail.com --- tools/perf/perf-sys.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 63e4349a772a..15e458e150bd 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -15,7 +15,9 @@ void test_attr__init(void); void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags); -#define HAVE_ATTR_TEST +#ifndef HAVE_ATTR_TEST +#define HAVE_ATTR_TEST 1 +#endif static inline int sys_perf_event_open(struct perf_event_attr *attr, @@ -27,7 +29,7 @@ sys_perf_event_open(struct perf_event_attr *attr, fd = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); -#ifdef HAVE_ATTR_TEST +#if HAVE_ATTR_TEST if (unlikely(test_attr__enabled)) test_attr__open(attr, pid, cpu, fd, group_fd, flags); #endif -- cgit From 04ec044b7d30800296824783df7d9728d16d7567 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 1 Oct 2019 13:33:07 +0200 Subject: samples/bpf: fix build by setting HAVE_ATTR_TEST to zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To remove that test_attr__{enabled/open} are used by perf-sys.h, we set HAVE_ATTR_TEST to zero. Signed-off-by: Björn Töpel Tested-by: KP Singh Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Namhyung Kim Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Link: http://lore.kernel.org/bpf/20191001113307.27796-3-bjorn.topel@gmail.com --- samples/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1d9be26b4edd..42b571cde177 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -176,6 +176,7 @@ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf +KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable -- cgit From ff1c08e1f74b6864854c39be48aa799a6a2e4d2b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 29 Oct 2019 16:43:07 +0100 Subject: bpf: Change size to u64 for bpf_map_{area_alloc, charge_init}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions bpf_map_area_alloc() and bpf_map_charge_init() prior this commit passed the size parameter as size_t. In this commit this is changed to u64. All users of these functions avoid size_t overflows on 32-bit systems, by explicitly using u64 when calculating the allocation size and memory charge cost. However, since the result was narrowed by the size_t when passing size and cost to the functions, the overflow handling was in vain. Instead of changing all call sites to size_t and handle overflow at the call site, the parameter is changed to u64 and checked in the functions above. Fixes: d407bd25a204 ("bpf: don't trigger OOM killer under pressure with map alloc") Fixes: c85d69135a91 ("bpf: move memory size checks to bpf_map_charge_init()") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20191029154307.23053-1-bjorn.topel@gmail.com --- include/linux/bpf.h | 4 ++-- kernel/bpf/syscall.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b9d22338606..3bf3835d0e86 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -656,11 +656,11 @@ void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size); +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); void bpf_map_charge_finish(struct bpf_map_memory *mem); void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src); -void *bpf_map_area_alloc(size_t size, int numa_node); +void *bpf_map_area_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0937719b87e2..ace1cfaa24b6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -126,7 +126,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } -void *bpf_map_area_alloc(size_t size, int numa_node) +void *bpf_map_area_alloc(u64 size, int numa_node) { /* We really just want to fail instead of triggering OOM killer * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, @@ -141,6 +141,9 @@ void *bpf_map_area_alloc(size_t size, int numa_node) const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; void *area; + if (size >= SIZE_MAX) + return NULL; + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, numa_node); @@ -197,7 +200,7 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) atomic_long_sub(pages, &user->locked_vm); } -int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size) +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) { u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; struct user_struct *user; -- cgit From 7de086909365cd60a5619a45af3f4152516fd75c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Oct 2019 20:34:44 -0700 Subject: powerpc/bpf: Fix tail call implementation We have seen many crashes on powerpc hosts while loading bpf programs. The problem here is that bpf_int_jit_compile() does a first pass to compute the program length. Then it allocates memory to store the generated program and calls bpf_jit_build_body() a second time (and a third time later) What I have observed is that the second bpf_jit_build_body() could end up using few more words than expected. If bpf_jit_binary_alloc() put the space for the program at the end of the allocated page, we then write on a non mapped memory. It appears that bpf_jit_emit_tail_call() calls bpf_jit_emit_common_epilogue() while ctx->seen might not be stable. Only after the second pass we can be sure ctx->seen wont be changed. Trying to avoid a second pass seems quite complex and probably not worth it. Fixes: ce0761419faef ("powerpc/bpf: Implement support for tail calls") Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Cc: Naveen N. Rao Cc: Sandipan Das Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Link: https://lore.kernel.org/bpf/20191101033444.143741-1-edumazet@google.com --- arch/powerpc/net/bpf_jit_comp64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 02a59946a78a..be3517ef0574 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1141,6 +1141,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto out_addrs; } + /* + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } + /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can -- cgit From de2a60522343a6cab998f61fd906eae445b19963 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Oct 2019 15:07:06 +0100 Subject: netfilter: nf_tables_offload: check for register data length mismatches Make sure register data length does not mismatch immediate data length, otherwise hit EOPNOTSUPP. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 5 +++-- net/netfilter/nft_cmp.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 974300178fa9..02afa752dd2e 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -134,12 +134,13 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, const struct nft_expr *expr) { const struct nft_bitwise *priv = nft_expr_priv(expr); + struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || - priv->sreg != priv->dreg) + priv->sreg != priv->dreg || priv->len != reg->len) return -EOPNOTSUPP; - memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask)); + memcpy(®->mask, &priv->mask, sizeof(priv->mask)); return 0; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index bd173b1824c6..0744b2bb46da 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -116,7 +116,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx, u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; - if (priv->op != NFT_CMP_EQ) + if (priv->op != NFT_CMP_EQ || reg->len != priv->len) return -EOPNOTSUPP; memcpy(key + reg->offset, &priv->data, priv->len); -- cgit From c43eab3eddb4c6742ac20138659a9b701822b274 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Mon, 4 Nov 2019 23:50:00 +0800 Subject: net: fec: add missed clk_disable_unprepare in remove This driver forgets to disable and unprepare clks when remove. Add calls to clk_disable_unprepare to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 22c01b224baa..a9c386b63581 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3645,6 +3645,8 @@ fec_drv_remove(struct platform_device *pdev) regulator_disable(fep->reg_phy); pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); -- cgit From 3d1e5039f5f87a8731202ceca08764ee7cb010d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Nov 2019 07:57:55 -0800 Subject: dccp: do not leak jiffies on the wire For some reason I missed the case of DCCP passive flows in my previous patch. Fixes: a904a0693c18 ("inet: stop leaking jiffies on the wire") Signed-off-by: Eric Dumazet Reported-by: Thiemo Nagel Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0d8f782c25cc..d19557c6d04b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -416,7 +416,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = jiffies; + newinet->inet_id = prandom_u32(); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; -- cgit From 30b7244d79651460ff114ba8f7987ed94c86b99a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 24 Aug 2019 17:49:55 +0300 Subject: netfilter: ipset: Fix an error code in ip_set_sockfn_get() The copy_to_user() function returns the number of bytes remaining to be copied. In this code, that positive return is checked at the end of the function and we return zero/success. What we should do instead is return -EFAULT. Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support") Signed-off-by: Dan Carpenter Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e64d5f9a89dd..e7288eab7512 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2069,8 +2069,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_version->version = IPSET_PROTOCOL; - ret = copy_to_user(user, req_version, - sizeof(struct ip_set_req_version)); + if (copy_to_user(user, req_version, + sizeof(struct ip_set_req_version))) + ret = -EFAULT; goto done; } case IP_SET_OP_GET_BYNAME: { @@ -2129,7 +2130,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } /* end of switch(op) */ copy: - ret = copy_to_user(user, data, copylen); + if (copy_to_user(user, data, copylen)) + ret = -EFAULT; done: vfree(data); -- cgit From 97664bc2c77e2b65cdedddcae2643fc93291d958 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 10 Oct 2019 19:18:14 +0200 Subject: netfilter: ipset: Copy the right MAC address in hash:ip,mac IPv6 sets Same as commit 1b4a75108d5b ("netfilter: ipset: Copy the right MAC address in bitmap:ip,mac and hash:ip,mac sets"), another copy and paste went wrong in commit 8cc4ccf58379 ("netfilter: ipset: Allow matching on destination MAC address for mac and ipmac sets"). When I fixed this for IPv4 in 1b4a75108d5b, I didn't realise that hash:ip,mac sets also support IPv6 as family, and this is covered by a separate function, hash_ipmac6_kadt(). In hash:ip,mac sets, the first dimension is the IP address, and the second dimension is the MAC address: check the IPSET_DIM_TWO_SRC flag in flags while deciding which MAC address to copy, destination or source. This way, mixing source and destination matches for the two dimensions of ip,mac hash type works as expected, also for IPv6. With this setup: ip netns add A ip link add veth1 type veth peer name veth2 netns A ip addr add 2001:db8::1/64 dev veth1 ip -net A addr add 2001:db8::2/64 dev veth2 ip link set veth1 up ip -net A link set veth2 up dst=$(ip netns exec A cat /sys/class/net/veth2/address) ip netns exec A ipset create test_hash hash:ip,mac family inet6 ip netns exec A ipset add test_hash 2001:db8::1,${dst} ip netns exec A ip6tables -A INPUT -p icmpv6 --icmpv6-type 135 -j ACCEPT ip netns exec A ip6tables -A INPUT -m set ! --match-set test_hash src,dst -j DROP ipset now correctly matches a test packet: # ping -c1 2001:db8::2 >/dev/null # echo $? 0 Reported-by: Chen, Yi Fixes: 8cc4ccf58379 ("netfilter: ipset: Allow matching on destination MAC address for mac and ipmac sets") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_ipmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 24d8f4df4230..4ce563eb927d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -209,7 +209,7 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, (skb_mac_header(skb) + ETH_HLEN) > skb->data) return -EINVAL; - if (opt->flags & IPSET_DIM_ONE_SRC) + if (opt->flags & IPSET_DIM_TWO_SRC) ether_addr_copy(e.ether, eth_hdr(skb)->h_source); else ether_addr_copy(e.ether, eth_hdr(skb)->h_dest); -- cgit From 1289975643f4cdecb071dc641059a47679fd170f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 1 Nov 2019 17:13:18 +0100 Subject: netfilter: ipset: Fix nla_policies to fully support NL_VALIDATE_STRICT Since v5.2 (commit "netlink: re-add parse/validate functions in strict mode") NL_VALIDATE_STRICT is enabled. Fix the ipset nla_policies which did not support strict mode and convert from deprecated parsings to verified ones. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 41 ++++++++++++++++++++++---------- net/netfilter/ipset/ip_set_hash_net.c | 1 + net/netfilter/ipset/ip_set_hash_netnet.c | 1 + 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e7288eab7512..d73d1828216a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -296,7 +296,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -314,7 +315,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -934,7 +936,8 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse_nested_deprecated(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) { + nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], + set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1281,6 +1284,14 @@ dump_attrs(struct nlmsghdr *nlh) } } +static const struct nla_policy +ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_FLAGS] = { .type = NLA_U32 }, +}; + static int dump_init(struct netlink_callback *cb, struct ip_set_net *inst) { @@ -1292,9 +1303,9 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; int ret; - ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr, - nlh->nlmsg_len - min_len, - ip_set_setname_policy, NULL); + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, + nlh->nlmsg_len - min_len, + ip_set_dump_policy, NULL); if (ret) return ret; @@ -1543,9 +1554,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr, - nlh->nlmsg_len - min_len, - ip_set_adt_policy, NULL); + ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, ip_set_adt_policy, + NULL); if (ret) { nlmsg_free(skb2); @@ -1596,7 +1607,9 @@ static int ip_set_ad(struct net *net, struct sock *ctnl, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, adt, flags, use_lineno); @@ -1606,7 +1619,8 @@ static int ip_set_ad(struct net *net, struct sock *ctnl, nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, adt, flags, use_lineno); @@ -1655,7 +1669,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (!set) return -ENOENT; - if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); @@ -1961,7 +1976,7 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { [IPSET_CMD_LIST] = { .call = ip_set_dump, .attr_count = IPSET_ATTR_CMD_MAX, - .policy = ip_set_setname_policy, + .policy = ip_set_dump_policy, }, [IPSET_CMD_SAVE] = { .call = ip_set_dump, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c259cbc3ef45..3d932de0ad29 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -368,6 +368,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index a3ae69bfee66..4398322fad59 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -476,6 +476,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = { [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, -- cgit From 250367c59e6ba0d79d702a059712d66edacd4a1a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 31 Oct 2019 11:06:24 +0100 Subject: netfilter: nf_tables: Align nft_expr private data to 64-bit Invoking the following commands on a 32-bit architecture with strict alignment requirements (such as an ARMv7-based Raspberry Pi) results in an alignment exception: # nft add table ip test-ip4 # nft add chain ip test-ip4 output { type filter hook output priority 0; } # nft add rule ip test-ip4 output quota 1025 bytes Alignment trap: not handling instruction e1b26f9f at [<7f4473f8>] Unhandled fault: alignment exception (0x001) at 0xb832e824 Internal error: : 1 [#1] PREEMPT SMP ARM Hardware name: BCM2835 [<7f4473fc>] (nft_quota_do_init [nft_quota]) [<7f447448>] (nft_quota_init [nft_quota]) [<7f4260d0>] (nf_tables_newrule [nf_tables]) [<7f4168dc>] (nfnetlink_rcv_batch [nfnetlink]) [<7f416bd0>] (nfnetlink_rcv [nfnetlink]) [<8078b334>] (netlink_unicast) [<8078b664>] (netlink_sendmsg) [<8071b47c>] (sock_sendmsg) [<8071bd18>] (___sys_sendmsg) [<8071ce3c>] (__sys_sendmsg) [<8071ce94>] (sys_sendmsg) The reason is that nft_quota_do_init() calls atomic64_set() on an atomic64_t which is only aligned to 32-bit, not 64-bit, because it succeeds struct nft_expr in memory which only contains a 32-bit pointer. Fix by aligning the nft_expr private data to 64-bit. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 001d294edf57..2d0275f13bbf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -820,7 +820,8 @@ struct nft_expr_ops { */ struct nft_expr { const struct nft_expr_ops *ops; - unsigned char data[]; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_expr_priv(const struct nft_expr *expr) -- cgit From 9fedd894b4e1c7ad5e5f711899f6a0a1da01d996 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Sat, 2 Nov 2019 21:59:44 +0100 Subject: netfilter: nf_tables: fix unexpected EOPNOTSUPP error If the object type doesn't implement an update operation and the user tries to update it will silently ignore the update operation. Fixes: aa4095a156b5 ("netfilter: nf_tables: fix possible null-pointer dereference in object update") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d481f9baca2f..aa26841ad9a1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5143,9 +5143,6 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, struct nft_trans *trans; int err; - if (!obj->ops->update) - return -EOPNOTSUPP; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) @@ -6499,7 +6496,8 @@ static void nft_obj_commit_update(struct nft_trans *trans) obj = nft_trans_obj(trans); newobj = nft_trans_obj_newobj(trans); - obj->ops->update(obj, newobj); + if (obj->ops->update) + obj->ops->update(obj, newobj); kfree(newobj); } -- cgit From b23c0742c2ce7e33ed79d10e451f70fdb5ca85d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Nov 2019 20:54:28 +0100 Subject: bridge: ebtables: don't crash when using dnat target in output chains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xt_in() returns NULL in the output hook, skip the pkt_type change for that case, redirection only makes sense in broute/prerouting hooks. Reported-by: Tom Yan Cc: Linus Lüssing Fixes: cf3cb246e277d ("bridge: ebtables: fix reception of frames DNAT-ed to bridge device/port") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_dnat.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index ed91ea31978a..12a4f4d93681 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -20,7 +20,6 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - struct net_device *dev; if (skb_ensure_writable(skb, ETH_ALEN)) return EBT_DROP; @@ -33,10 +32,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) else skb->pkt_type = PACKET_MULTICAST; } else { - if (xt_hooknum(par) != NF_BR_BROUTING) - dev = br_port_get_rcu(xt_in(par))->br->dev; - else + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; -- cgit From 1ed012f6fd83e7ee7efd22e2c32f23efff015b30 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:52:41 +0100 Subject: netfilter: nf_tables: bogus EOPNOTSUPP on basechain update Userspace never includes the NFT_BASE_CHAIN flag, this flag is inferred from the NFTA_CHAIN_HOOK atribute. The chain update path does not allow to update flags at this stage, the existing sanity check bogusly hits EOPNOTSUPP in the basechain case if the offload flag is set on. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index aa26841ad9a1..712a428509ad 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1922,6 +1922,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + flags |= chain->flags & NFT_BASE_CHAIN; return nf_tables_updchain(&ctx, genmask, policy, flags); } -- cgit From 88c749840dff58e7a40e18bf9bdace15f27ef259 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Nov 2019 14:52:42 +0100 Subject: netfilter: nf_tables_offload: skip EBUSY on chain update Do not try to bind a chain again if it exists, otherwise the driver returns EBUSY. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index ad783f4840ef..e25dab8128db 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -334,7 +334,8 @@ int nft_flow_rule_offload_commit(struct net *net) switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); -- cgit From db9ee384f6f71f7c5296ce85b7c1a2a2527e7c72 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Sat, 28 Sep 2019 22:29:05 +0800 Subject: can: dev: add missing of_node_put() after calling of_get_child_by_name() of_node_put() needs to be called when the device node which is got from of_get_child_by_name() finished using. Fixes: 2290aefa2e90 ("can: dev: Add support for limiting configured bitrate") Cc: Franklin S Cooper Jr Signed-off-by: Wen Yang Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index ac86be52b461..1c88c361938c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -848,6 +848,7 @@ void of_can_transceiver(struct net_device *dev) return; ret = of_property_read_u32(dn, "max-bitrate", &priv->bitrate_max); + of_node_put(dn); if ((ret && ret != -EINVAL) || (!ret && !priv->bitrate_max)) netdev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit.\n"); } -- cgit From fb5be6a7b4863ecc44963bb80ca614584b6c7817 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Thu, 19 Sep 2019 21:44:38 -0500 Subject: can: gs_usb: gs_can_open(): prevent memory leak In gs_can_open() if usb_submit_urb() fails the allocated urb should be released. Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Cc: linux-stable Signed-off-by: Navid Emamdoost Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index bd6eb9967630..2f74f6704c12 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -623,6 +623,7 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_urb(urb); break; } -- cgit From 4d6636498c41891d0482a914dd570343a838ad79 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Oct 2019 12:29:13 +0200 Subject: can: mcba_usb: fix use-after-free on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver was accessing its driver data after having freed it. Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer") Cc: stable # 4.12 Cc: Remigiusz Kołłątaj Reported-by: syzbot+e29b17e5042bbc56fae9@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 19a702ac49e4..21faa2ec4632 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -876,9 +876,8 @@ static void mcba_usb_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_candev(priv->netdev); - free_candev(priv->netdev); - mcba_urb_unlink(priv); + free_candev(priv->netdev); } static struct usb_driver mcba_usb_driver = { -- cgit From 3759739426186a924675651b388d1c3963c5710e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Oct 2019 12:29:14 +0200 Subject: can: usb_8dev: fix use-after-free on disconnect The driver was accessing its driver data after having freed it. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Cc: stable # 3.9 Cc: Bernd Krumboeck Cc: Wolfgang Grandegger Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index d596a2ad7f78..8fa224b28218 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -996,9 +996,8 @@ static void usb_8dev_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_netdev(priv->netdev); - free_candev(priv->netdev); - unlink_all_urbs(priv); + free_candev(priv->netdev); } } -- cgit From 5e269324db5adb2f5f6ec9a93a9c7b0672932b47 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Thu, 15 Aug 2019 08:00:26 +0000 Subject: can: flexcan: disable completely the ECC mechanism The ECC (memory error detection and correction) mechanism can be activated or not, controlled by the ECCDIS bit in CAN_MECR. When disabled, updates on indications and reporting registers are stopped. So if want to disable ECC completely, had better assert ECCDIS bit, not just mask the related interrupts. Fixes: cdce844865be ("can: flexcan: add vf610 support for FlexCAN") Signed-off-by: Joakim Zhang Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index dc5695dffc2e..1cd5179cb876 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1188,6 +1188,7 @@ static int flexcan_chip_start(struct net_device *dev) reg_mecr = priv->read(®s->mecr); reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; priv->write(reg_mecr, ®s->mecr); + reg_mecr |= FLEXCAN_MECR_ECCDIS; reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK | FLEXCAN_MECR_FANCEI_MSK); priv->write(reg_mecr, ®s->mecr); -- cgit From de280f403f2996679e2607384980703710576fed Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Tue, 8 Oct 2019 10:35:44 +0200 Subject: can: peak_usb: fix a potential out-of-sync while decoding packets When decoding a buffer received from PCAN-USB, the first timestamp read in a packet is a 16-bit coded time base, and the next ones are an 8-bit offset to this base, regardless of the type of packet read. This patch corrects a potential loss of synchronization by using a timestamp index read from the buffer, rather than an index of received data packets, to determine on the sizeof the timestamp to be read from the packet being decoded. Signed-off-by: Stephane Grosjean Fixes: 46be265d3388 ("can: usb: PEAK-System Technik PCAN-USB specific part") Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 617da295b6c1..5a66c9f53aae 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -100,7 +100,7 @@ struct pcan_usb_msg_context { u8 *end; u8 rec_cnt; u8 rec_idx; - u8 rec_data_idx; + u8 rec_ts_idx; struct net_device *netdev; struct pcan_usb *pdev; }; @@ -547,10 +547,15 @@ static int pcan_usb_decode_status(struct pcan_usb_msg_context *mc, mc->ptr += PCAN_USB_CMD_ARGS; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { - int err = pcan_usb_decode_ts(mc, !mc->rec_idx); + int err = pcan_usb_decode_ts(mc, !mc->rec_ts_idx); if (err) return err; + + /* Next packet in the buffer will have a timestamp on a single + * byte + */ + mc->rec_ts_idx++; } switch (f) { @@ -632,10 +637,13 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) cf->can_dlc = get_can_dlc(rec_len); - /* first data packet timestamp is a word */ - if (pcan_usb_decode_ts(mc, !mc->rec_data_idx)) + /* Only first packet timestamp is a word */ + if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx)) goto decode_failed; + /* Next packet in the buffer will have a timestamp on a single byte */ + mc->rec_ts_idx++; + /* read data */ memset(cf->data, 0x0, sizeof(cf->data)); if (status_len & PCAN_USB_STATUSLEN_RTR) { @@ -688,7 +696,6 @@ static int pcan_usb_decode_msg(struct peak_usb_device *dev, u8 *ibuf, u32 lbuf) /* handle normal can frames here */ } else { err = pcan_usb_decode_data(&mc, sl); - mc.rec_data_idx++; } } -- cgit From f7a1337f0d29b98733c8824e165fca3371d7d4fd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 23 Oct 2019 10:27:05 +0200 Subject: can: peak_usb: fix slab info leak Fix a small slab info leak due to a failure to clear the command buffer at allocation. The first 16 bytes of the command buffer are always sent to the device in pcan_usb_send_cmd() even though only the first two may have been initialised in case no argument payload is provided (e.g. when waiting for a response). Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Cc: stable # 3.4 Reported-by: syzbot+863724e7128e14b26732@syzkaller.appspotmail.com Signed-off-by: Johan Hovold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 65dce642b86b..0b7766b715fd 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -750,7 +750,7 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, dev = netdev_priv(netdev); /* allocate a buffer large enough to send commands */ - dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); + dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; goto lbl_free_candev; -- cgit From 128a1b87d3ceb2ba449d5aadb222fe22395adeb0 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Wed, 25 Sep 2019 08:58:45 +0000 Subject: can: peak_usb: report bus recovery as well While the state changes are reported when the error counters increase and decrease, there is no event when the bus recovers and the error counters decrease again. So add those as well. Change the state going downward to be ERROR_PASSIVE -> ERROR_WARNING -> ERROR_ACTIVE instead of directly to ERROR_ACTIVE again. Signed-off-by: Jeroen Hofstee Cc: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 5a66c9f53aae..d2539c95adb6 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -436,8 +436,8 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) { /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + new_state = CAN_STATE_ERROR_ACTIVE; + break; } break; @@ -460,9 +460,9 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) { - /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + /* no error (back to warning state) */ + new_state = CAN_STATE_ERROR_WARNING; + break; } break; @@ -501,6 +501,11 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, mc->pdev->dev.can.can_stats.error_warning++; break; + case CAN_STATE_ERROR_ACTIVE: + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + break; + default: /* CAN_STATE_MAX (trick to handle other errors) */ cf->can_id |= CAN_ERR_CRTL; -- cgit From 3cb3eaac52c0f145d895f4b6c22834d5f02b8569 Mon Sep 17 00:00:00 2001 From: Kurt Van Dijck Date: Tue, 1 Oct 2019 09:40:36 +0200 Subject: can: c_can: c_can_poll(): only read status register after status IRQ When the status register is read without the status IRQ pending, the chip may not raise the interrupt line for an upcoming status interrupt and the driver may miss a status interrupt. It is critical that the BUSOFF status interrupt is forwarded to the higher layers, since no more interrupts will follow without intervention. Thanks to Wolfgang and Joe for bringing up the first idea. Signed-off-by: Kurt Van Dijck Cc: Wolfgang Grandegger Cc: Joe Burmeister Fixes: fa39b54ccf28 ("can: c_can: Get rid of pointless interrupts") Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 25 ++++++++++++++++++++----- drivers/net/can/c_can/c_can.h | 1 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 606b7d8ffe13..9b61bfbea6cd 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -97,6 +97,9 @@ #define BTR_TSEG2_SHIFT 12 #define BTR_TSEG2_MASK (0x7 << BTR_TSEG2_SHIFT) +/* interrupt register */ +#define INT_STS_PENDING 0x8000 + /* brp extension register */ #define BRP_EXT_BRPE_MASK 0x0f #define BRP_EXT_BRPE_SHIFT 0 @@ -1029,10 +1032,16 @@ static int c_can_poll(struct napi_struct *napi, int quota) u16 curr, last = priv->last_status; int work_done = 0; - priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); - /* Ack status on C_CAN. D_CAN is self clearing */ - if (priv->type != BOSCH_D_CAN) - priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + /* Only read the status register if a status interrupt was pending */ + if (atomic_xchg(&priv->sie_pending, 0)) { + priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); + /* Ack status on C_CAN. D_CAN is self clearing */ + if (priv->type != BOSCH_D_CAN) + priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + } else { + /* no change detected ... */ + curr = last; + } /* handle state changes */ if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) { @@ -1083,10 +1092,16 @@ static irqreturn_t c_can_isr(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; struct c_can_priv *priv = netdev_priv(dev); + int reg_int; - if (!priv->read_reg(priv, C_CAN_INT_REG)) + reg_int = priv->read_reg(priv, C_CAN_INT_REG); + if (!reg_int) return IRQ_NONE; + /* save for later use */ + if (reg_int & INT_STS_PENDING) + atomic_set(&priv->sie_pending, 1); + /* disable all interrupts and schedule the NAPI */ c_can_irq_control(priv, false); napi_schedule(&priv->napi); diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 8acdc7fa4792..d5567a7c1c6d 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -198,6 +198,7 @@ struct c_can_priv { struct net_device *dev; struct device *device; atomic_t tx_active; + atomic_t sie_pending; unsigned long tx_dir; int last_status; u16 (*read_reg) (const struct c_can_priv *priv, enum reg index); -- cgit From 23c5a9488f076bab336177cd1d1a366bd8ddf087 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 1 Oct 2019 21:01:20 +0000 Subject: can: c_can: D_CAN: c_can_chip_config(): perform a sofware reset on open When the CAN interface is closed it the hardwre is put in power down mode, but does not reset the error counters / state. Reset the D_CAN on open, so the reported state and the actual state match. According to [1], the C_CAN module doesn't have the software reset. [1] http://www.bosch-semiconductors.com/media/ip_modules/pdf_2/c_can_fd8/users_manual_c_can_fd8_r210_1.pdf Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 9b61bfbea6cd..24c6015f6c92 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -52,6 +52,7 @@ #define CONTROL_EX_PDR BIT(8) /* control register */ +#define CONTROL_SWR BIT(15) #define CONTROL_TEST BIT(7) #define CONTROL_CCE BIT(6) #define CONTROL_DISABLE_AR BIT(5) @@ -572,6 +573,26 @@ static void c_can_configure_msg_objects(struct net_device *dev) IF_MCONT_RCV_EOB); } +static int c_can_software_reset(struct net_device *dev) +{ + struct c_can_priv *priv = netdev_priv(dev); + int retry = 0; + + if (priv->type != BOSCH_D_CAN) + return 0; + + priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_SWR | CONTROL_INIT); + while (priv->read_reg(priv, C_CAN_CTRL_REG) & CONTROL_SWR) { + msleep(20); + if (retry++ > 100) { + netdev_err(dev, "CCTRL: software reset failed\n"); + return -EIO; + } + } + + return 0; +} + /* * Configure C_CAN chip: * - enable/disable auto-retransmission @@ -581,6 +602,11 @@ static void c_can_configure_msg_objects(struct net_device *dev) static int c_can_chip_config(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); + int err; + + err = c_can_software_reset(dev); + if (err) + return err; /* enable automatic retransmission */ priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR); -- cgit From 6f12001ad5e79d0a0b08c599731d45c34cafd376 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 1 Oct 2019 21:01:24 +0000 Subject: can: c_can: C_CAN: add bus recovery events While the state is updated when the error counters increase and decrease, there is no event when the bus recovers and the error counters decrease again. So add that event as well. Change the state going downward to be ERROR_PASSIVE -> ERROR_WARNING -> ERROR_ACTIVE instead of directly to ERROR_ACTIVE again. Signed-off-by: Jeroen Hofstee Acked-by: Kurt Van Dijck Tested-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 24c6015f6c92..8e9f5620c9a2 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -915,6 +915,9 @@ static int c_can_handle_state_change(struct net_device *dev, struct can_berr_counter bec; switch (error_type) { + case C_CAN_NO_ERROR: + priv->can.state = CAN_STATE_ERROR_ACTIVE; + break; case C_CAN_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; @@ -945,6 +948,13 @@ static int c_can_handle_state_change(struct net_device *dev, ERR_CNT_RP_SHIFT; switch (error_type) { + case C_CAN_NO_ERROR: + /* error warning state */ + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + break; case C_CAN_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; @@ -1089,11 +1099,17 @@ static int c_can_poll(struct napi_struct *napi, int quota) /* handle bus recovery events */ if ((!(curr & STATUS_BOFF)) && (last & STATUS_BOFF)) { netdev_dbg(dev, "left bus off state\n"); - priv->can.state = CAN_STATE_ERROR_ACTIVE; + work_done += c_can_handle_state_change(dev, C_CAN_ERROR_PASSIVE); } + if ((!(curr & STATUS_EPASS)) && (last & STATUS_EPASS)) { netdev_dbg(dev, "left error passive state\n"); - priv->can.state = CAN_STATE_ERROR_ACTIVE; + work_done += c_can_handle_state_change(dev, C_CAN_ERROR_WARNING); + } + + if ((!(curr & STATUS_EWARN)) && (last & STATUS_EWARN)) { + netdev_dbg(dev, "left error warning state\n"); + work_done += c_can_handle_state_change(dev, C_CAN_NO_ERROR); } /* handle lec errors on the bus */ -- cgit From 659680bc232ff29cd6aea8df58115775ac365565 Mon Sep 17 00:00:00 2001 From: Appana Durga Kedareswara rao Date: Wed, 9 Oct 2019 12:59:47 +0530 Subject: can: xilinx_can: Fix flags field initialization for axi can AXI CANIP doesn't support tx fifo empty interrupt feature(TXFEMP), update the flags filed in the driver for AXI CAN case accordingly. Fixes: 3281b380ec9f ("can: xilinx_can: Fix flags field initialization for axi can and canps") Reported-by: Anssi Hannula Signed-off-by: Appana Durga Kedareswara rao Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 911b34316c9d..7c482b2d78d2 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1599,7 +1599,6 @@ static const struct xcan_devtype_data xcan_zynq_data = { static const struct xcan_devtype_data xcan_axi_data = { .cantype = XAXI_CAN, - .flags = XCAN_FLAG_TXFEMP, .bittiming_const = &xcan_bittiming_const, .btr_ts2_shift = XCAN_BTR_TS2_SHIFT, .btr_sjw_shift = XCAN_BTR_SJW_SHIFT, -- cgit From ca913f1ac024559ebc17f0b599af262f0ad997c9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:48:48 +0200 Subject: can: rx-offload: can_rx_offload_queue_sorted(): fix error handling, avoid skb mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the rx-offload skb_queue is full can_rx_offload_queue_sorted() will not queue the skb and return with an error. None of the callers of this function, issue a kfree_skb() to free the not queued skb. This results in a memory leak. This patch fixes the problem by freeing the skb in case of a full queue. The return value is adjusted to -ENOBUFS to better reflect the actual problem. The device stats handling is left to the callers, as this function might be used in both the rx and tx path. Fixes: 55059f2b7f86 ("can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions") Cc: linux-stable Cc: Martin Hundebøll Reported-by: Martin Hundebøll Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index e6a668ee7730..663697439d1c 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -207,8 +207,10 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, unsigned long flags; if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } cb = can_rx_offload_get_cb(skb); cb->timestamp = timestamp; -- cgit From 6caf8a6d6586d44fd72f4aa1021d14aa82affafb Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:48:48 +0200 Subject: can: rx-offload: can_rx_offload_queue_tail(): fix error handling, avoid skb mem leak If the rx-offload skb_queue is full can_rx_offload_queue_tail() will not queue the skb and return with an error. This patch frees the skb in case of a full queue, which brings can_rx_offload_queue_tail() in line with the can_rx_offload_queue_sorted() function, which has been adjusted in the previous patch. The return value is adjusted to -ENOBUFS to better reflect the actual problem. The device stats handling is left to the caller. Fixes: d254586c3453 ("can: rx-offload: Add support for HW fifo based irq offloading") Reported-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 663697439d1c..d1c863409945 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -252,8 +252,10 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } skb_queue_tail(&offload->skb_queue, skb); can_rx_offload_schedule(offload); -- cgit From a2dc3f5e1022a5ede8af9ab89a144f1e69db8636 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 16:03:18 +0200 Subject: can: rx-offload: can_rx_offload_offload_one(): do not increase the skb_queue beyond skb_queue_len_max The skb_queue is a linked list, holding the skb to be processed in the next NAPI call. Without this patch, the queue length in can_rx_offload_offload_one() is limited to skb_queue_len_max + 1. As the skb_queue is a linked list, no array or other resources are accessed out-of-bound, however this behaviour is counterintuitive. This patch limits the rx-offload skb_queue length to skb_queue_len_max. Fixes: d254586c3453 ("can: rx-offload: Add support for HW fifo based irq offloading") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index d1c863409945..bdc27481b57f 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -115,7 +115,7 @@ static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload int ret; /* If queue is full or skb not available, read to discard mailbox */ - if (likely(skb_queue_len(&offload->skb_queue) <= + if (likely(skb_queue_len(&offload->skb_queue) < offload->skb_queue_len_max)) skb = alloc_can_skb(offload->dev, &cf); -- cgit From 4e9016bee3bf0c24963097edace034ff205b565c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 15:15:07 +0200 Subject: can: rx-offload: can_rx_offload_offload_one(): increment rx_fifo_errors on queue overflow or OOM If the rx-offload skb_queue is full or the skb allocation fails (due to OOM), the mailbox contents is discarded. This patch adds the incrementing of the rx_fifo_errors statistics counter. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index bdc27481b57f..e224530a0630 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -125,8 +125,10 @@ static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) + if (ret) { offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + } return NULL; } -- cgit From d763ab3044f0bf50bd0e6179f6b2cf1c125d1d94 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 9 Oct 2019 21:00:32 +0200 Subject: can: rx-offload: can_rx_offload_offload_one(): use ERR_PTR() to propagate error value in case of errors Before this patch can_rx_offload_offload_one() returns a pointer to a skb containing the read CAN frame or a NULL pointer. However the meaning of the NULL pointer is ambiguous, it can either mean the requested mailbox is empty or there was an error. This patch fixes this situation by returning: - pointer to skb on success - NULL pointer if mailbox is empty - ERR_PTR() in case of an error All users of can_rx_offload_offload_one() have been adopted, no functional change intended. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 86 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index e224530a0630..3f5e040f0c71 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -107,39 +107,95 @@ static int can_rx_offload_compare(struct sk_buff *a, struct sk_buff *b) return cb_b->timestamp - cb_a->timestamp; } -static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) +/** + * can_rx_offload_offload_one() - Read one CAN frame from HW + * @offload: pointer to rx_offload context + * @n: number of mailbox to read + * + * The task of this function is to read a CAN frame from mailbox @n + * from the device and return the mailbox's content as a struct + * sk_buff. + * + * If the struct can_rx_offload::skb_queue exceeds the maximal queue + * length (struct can_rx_offload::skb_queue_len_max) or no skb can be + * allocated, the mailbox contents is discarded by reading it into an + * overflow buffer. This way the mailbox is marked as free by the + * driver. + * + * Return: A pointer to skb containing the CAN frame on success. + * + * NULL if the mailbox @n is empty. + * + * ERR_PTR() in case of an error + */ +static struct sk_buff * +can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) { - struct sk_buff *skb = NULL; + struct sk_buff *skb = NULL, *skb_error = NULL; struct can_rx_offload_cb *cb; struct can_frame *cf; int ret; - /* If queue is full or skb not available, read to discard mailbox */ if (likely(skb_queue_len(&offload->skb_queue) < - offload->skb_queue_len_max)) + offload->skb_queue_len_max)) { skb = alloc_can_skb(offload->dev, &cf); + if (unlikely(!skb)) + skb_error = ERR_PTR(-ENOMEM); /* skb alloc failed */ + } else { + skb_error = ERR_PTR(-ENOBUFS); /* skb_queue is full */ + } - if (!skb) { + /* If queue is full or skb not available, drop by reading into + * overflow buffer. + */ + if (unlikely(skb_error)) { struct can_frame cf_overflow; u32 timestamp; ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) { - offload->dev->stats.rx_dropped++; - offload->dev->stats.rx_fifo_errors++; - } - return NULL; + /* Mailbox was empty. */ + if (unlikely(!ret)) + return NULL; + + /* Mailbox has been read and we're dropping it or + * there was a problem reading the mailbox. + * + * Increment error counters in any case. + */ + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + /* There was a problem reading the mailbox, propagate + * error value. + */ + if (unlikely(ret < 0)) + return ERR_PTR(ret); + + return skb_error; } cb = can_rx_offload_get_cb(skb); ret = offload->mailbox_read(offload, cf, &cb->timestamp, n); - if (!ret) { + + /* Mailbox was empty. */ + if (unlikely(!ret)) { kfree_skb(skb); return NULL; } + /* There was a problem reading the mailbox, propagate error value. */ + if (unlikely(ret < 0)) { + kfree_skb(skb); + + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + return ERR_PTR(ret); + } + + /* Mailbox was read. */ return skb; } @@ -159,7 +215,7 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen continue; skb = can_rx_offload_offload_one(offload, i); - if (!skb) + if (IS_ERR_OR_NULL(skb)) break; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); @@ -190,7 +246,11 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) struct sk_buff *skb; int received = 0; - while ((skb = can_rx_offload_offload_one(offload, 0))) { + while (1) { + skb = can_rx_offload_offload_one(offload, 0); + if (IS_ERR_OR_NULL(skb)) + break; + skb_queue_tail(&offload->skb_queue, skb); received++; } -- cgit From c2a9f74c9d18acfdcabd3361adc7eac82c537a66 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:38 +0000 Subject: can: rx-offload: can_rx_offload_irq_offload_timestamp(): continue on error In case of a resource shortage, i.e. the rx_offload queue will overflow or a skb fails to be allocated (due to OOM), can_rx_offload_offload_one() will call mailbox_read() to discard the mailbox and return an ERR_PTR. However can_rx_offload_irq_offload_timestamp() bails out in the error case. In case of a resource shortage all mailboxes should be discarded, to avoid an IRQ storm and give the system some time to recover. Since can_rx_offload_irq_offload_timestamp() is typically called from a while loop, all message will eventually be discarded. So let's continue on error instead to discard them directly. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 3f5e040f0c71..2ea8676579a9 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -216,7 +216,7 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen skb = can_rx_offload_offload_one(offload, i); if (IS_ERR_OR_NULL(skb)) - break; + continue; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); } -- cgit From 1f7f504dcd9d1262437bdcf4fa071e41dec1af03 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 24 Sep 2019 18:45:38 +0000 Subject: can: rx-offload: can_rx_offload_irq_offload_fifo(): continue on error In case of a resource shortage, i.e. the rx_offload queue will overflow or a skb fails to be allocated (due to OOM), can_rx_offload_offload_one() will call mailbox_read() to discard the mailbox and return an ERR_PTR. If the hardware FIFO is empty can_rx_offload_offload_one() will return NULL. In case a CAN frame was read from the hardware, can_rx_offload_offload_one() returns the skb containing it. Without this patch can_rx_offload_irq_offload_fifo() bails out if no skb returned, regardless of the reason. Similar to can_rx_offload_irq_offload_timestamp() in case of a resource shortage the whole FIFO should be discarded, to avoid an IRQ storm and give the system some time to recover. However if the FIFO is empty the loop can be left. With this patch the loop is left in case of empty FIFO, but not on errors. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 2ea8676579a9..84cae167e42f 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -248,7 +248,9 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) while (1) { skb = can_rx_offload_offload_one(offload, 0); - if (IS_ERR_OR_NULL(skb)) + if (IS_ERR(skb)) + continue; + if (!skb) break; skb_queue_tail(&offload->skb_queue, skb); -- cgit From 758124335a9dd649ab820bfb5b328170919ee7dc Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 15 Jul 2019 20:53:08 +0200 Subject: can: flexcan: increase error counters if skb enqueueing via can_rx_offload_queue_sorted() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to can_rx_offload_queue_sorted() may fail and return an error (in the current implementation due to resource shortage). The passed skb is consumed. This patch adds incrementing of the appropriate error counters to let the device statistics reflect that there's a problem. Reported-by: Martin Hundebøll Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 1cd5179cb876..57f9a2f51085 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -677,6 +677,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) struct can_frame *cf; bool rx_errors = false, tx_errors = false; u32 timestamp; + int err; timestamp = priv->read(®s->timer) << 16; @@ -725,7 +726,9 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) if (tx_errors) dev->stats.tx_errors++; - can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + if (err) + dev->stats.rx_fifo_errors++; } static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) @@ -738,6 +741,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) int flt; struct can_berr_counter bec; u32 timestamp; + int err; timestamp = priv->read(®s->timer) << 16; @@ -769,7 +773,9 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + if (err) + dev->stats.rx_fifo_errors++; } static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) -- cgit From c4409e9fbea954fdae7927205283dfc3ed8e2d6e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 15 Jul 2019 20:53:08 +0200 Subject: can: ti_hecc: ti_hecc_error(): increase error counters if skb enqueueing via can_rx_offload_queue_sorted() fails The call to can_rx_offload_queue_sorted() may fail and return an error (in the current implementation due to resource shortage). The passed skb is consumed. This patch adds incrementing of the appropriate error counters to let the device statistics reflect that there's a problem. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index f8b19eef5d26..91188e6d4f78 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -558,6 +558,7 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, struct can_frame *cf; struct sk_buff *skb; u32 timestamp; + int err; /* propagate the error condition to the can stack */ skb = alloc_can_err_skb(ndev, &cf); @@ -639,7 +640,9 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, } timestamp = hecc_read(priv, HECC_CANLNT); - can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); + if (err) + ndev->stats.rx_fifo_errors++; return 0; } -- cgit From 59f415c2f5e20a6859e49626e8af4de983ff111c Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:52 +0000 Subject: can: ti_hecc: ti_hecc_stop(): stop the CPK on down When the interface goes down, the CPK should no longer take an active part in the CAN-bus communication, like sending acks and error frames. So enable configuration mode in ti_hecc_stop, so the CPK is no longer active. When a transceiver switch is present the acks and errors don't make it to the bus, but disabling the CPK then does prevent oddities, like ti_hecc_reset() failing, since the CPK can become bus-off and starts counting the 11 bit recessive bits, which seems to block the reset. It can also cause invalid interrupts and disrupt the CAN-bus, since transmission can be stopped in the middle of a message, by disabling the tranceiver while the CPK is sending. Since the CPK is disabled after normal power on, it is typically only seen when the interface is restarted. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 91188e6d4f78..eb8151154083 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -400,6 +400,9 @@ static void ti_hecc_stop(struct net_device *ndev) { struct ti_hecc_priv *priv = netdev_priv(ndev); + /* Disable the CPK; stop sending, erroring and acking */ + hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_CCR); + /* Disable interrupts and disable mailboxes */ hecc_write(priv, HECC_CANGIM, 0); hecc_write(priv, HECC_CANMIM, 0); -- cgit From 10f5d55ddcbf1c30529d90beffedcf84844d6f42 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:56 +0000 Subject: can: ti_hecc: keep MIM and MD set The HECC_CANMIM is set in the xmit path and cleared in the interrupt. Since this is done with a read, modify, write action the register might end up with some more MIM enabled then intended, since it is not protected. That doesn't matter at all, since the tx interrupt disables the mailbox with HECC_CANME (while holding a spinlock). So lets just always keep MIM set. While at it, since the mailbox direction never changes, don't set it every time a message is send, ti_hecc_reset() already sets them to tx. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index eb8151154083..d6a84f8ff863 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -382,6 +382,9 @@ static void ti_hecc_start(struct net_device *ndev) hecc_set_bit(priv, HECC_CANMIM, mbx_mask); } + /* Enable tx interrupts */ + hecc_set_bit(priv, HECC_CANMIM, BIT(HECC_MAX_TX_MBOX) - 1); + /* Prevent message over-write & Enable interrupts */ hecc_write(priv, HECC_CANOPC, HECC_SET_REG); if (priv->use_hecc1int) { @@ -511,8 +514,6 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) hecc_set_bit(priv, HECC_CANME, mbx_mask); spin_unlock_irqrestore(&priv->mbx_lock, flags); - hecc_clear_bit(priv, HECC_CANMD, mbx_mask); - hecc_set_bit(priv, HECC_CANMIM, mbx_mask); hecc_write(priv, HECC_CANTRS, mbx_mask); return NETDEV_TX_OK; @@ -676,7 +677,6 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) mbx_mask = BIT(mbxno); if (!(mbx_mask & hecc_read(priv, HECC_CANTA))) break; - hecc_clear_bit(priv, HECC_CANMIM, mbx_mask); hecc_write(priv, HECC_CANTA, mbx_mask); spin_lock_irqsave(&priv->mbx_lock, flags); hecc_clear_bit(priv, HECC_CANME, mbx_mask); -- cgit From 99383749c25954c23c87e1592f6b49b216e0a2e2 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:45:49 +0000 Subject: can: ti_hecc: release the mailbox a bit earlier Release the mailbox after reading it, so it can be reused a bit earlier. Since "can: rx-offload: continue on error" all pending message bits are cleared directly, so remove clearing them in ti_hecc. Suggested-by: Marc Kleine-Budde Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index d6a84f8ff863..6ea29126c60b 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -530,8 +530,9 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, u32 *timestamp, unsigned int mbxno) { struct ti_hecc_priv *priv = rx_offload_to_priv(offload); - u32 data; + u32 data, mbx_mask; + mbx_mask = BIT(mbxno); data = hecc_read_mbx(priv, mbxno, HECC_CANMID); if (data & HECC_CANMID_IDE) cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG; @@ -551,6 +552,7 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, } *timestamp = hecc_read_stamp(priv, mbxno); + hecc_write(priv, HECC_CANRMP, mbx_mask); return 1; } @@ -701,7 +703,6 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) while ((rx_pending = hecc_read(priv, HECC_CANRMP))) { can_rx_offload_irq_offload_timestamp(&priv->offload, rx_pending); - hecc_write(priv, HECC_CANRMP, rx_pending); } } -- cgit From 678d85ed8554e1d6c9720ebcab785eea8fe0d4ef Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:46:00 +0000 Subject: can: ti_hecc: add fifo overflow error reporting When the rx FIFO overflows the ti_hecc would silently drop them since the overwrite protection is enabled for all mailboxes. So disable it for the lowest priority mailbox and return a proper error value when receive message lost is set. Drop the message itself in that case, since it might be partially updated. Signed-off-by: Jeroen Hofstee Acked-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 6ea29126c60b..b12fd0bd489d 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -73,6 +73,7 @@ MODULE_VERSION(HECC_MODULE_VERSION); */ #define HECC_MAX_RX_MBOX (HECC_MAX_MAILBOXES - HECC_MAX_TX_MBOX) #define HECC_RX_FIRST_MBOX (HECC_MAX_MAILBOXES - 1) +#define HECC_RX_LAST_MBOX (HECC_MAX_TX_MBOX) /* TI HECC module registers */ #define HECC_CANME 0x0 /* Mailbox enable */ @@ -82,7 +83,7 @@ MODULE_VERSION(HECC_MODULE_VERSION); #define HECC_CANTA 0x10 /* Transmission acknowledge */ #define HECC_CANAA 0x14 /* Abort acknowledge */ #define HECC_CANRMP 0x18 /* Receive message pending */ -#define HECC_CANRML 0x1C /* Remote message lost */ +#define HECC_CANRML 0x1C /* Receive message lost */ #define HECC_CANRFP 0x20 /* Remote frame pending */ #define HECC_CANGAM 0x24 /* SECC only:Global acceptance mask */ #define HECC_CANMC 0x28 /* Master control */ @@ -385,8 +386,15 @@ static void ti_hecc_start(struct net_device *ndev) /* Enable tx interrupts */ hecc_set_bit(priv, HECC_CANMIM, BIT(HECC_MAX_TX_MBOX) - 1); - /* Prevent message over-write & Enable interrupts */ - hecc_write(priv, HECC_CANOPC, HECC_SET_REG); + /* Prevent message over-write to create a rx fifo, but not for + * the lowest priority mailbox, since that allows detecting + * overflows instead of the hardware silently dropping the + * messages. + */ + mbx_mask = ~BIT(HECC_RX_LAST_MBOX); + hecc_write(priv, HECC_CANOPC, mbx_mask); + + /* Enable interrupts */ if (priv->use_hecc1int) { hecc_write(priv, HECC_CANMIL, HECC_SET_REG); hecc_write(priv, HECC_CANGIM, HECC_CANGIM_DEF_MASK | @@ -531,6 +539,7 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, { struct ti_hecc_priv *priv = rx_offload_to_priv(offload); u32 data, mbx_mask; + int ret = 1; mbx_mask = BIT(mbxno); data = hecc_read_mbx(priv, mbxno, HECC_CANMID); @@ -552,9 +561,26 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload, } *timestamp = hecc_read_stamp(priv, mbxno); + + /* Check for FIFO overrun. + * + * All but the last RX mailbox have activated overwrite + * protection. So skip check for overrun, if we're not + * handling the last RX mailbox. + * + * As the overwrite protection for the last RX mailbox is + * disabled, the CAN core might update while we're reading + * it. This means the skb might be inconsistent. + * + * Return an error to let rx-offload discard this CAN frame. + */ + if (unlikely(mbxno == HECC_RX_LAST_MBOX && + hecc_read(priv, HECC_CANRML) & mbx_mask)) + ret = -ENOBUFS; + hecc_write(priv, HECC_CANRMP, mbx_mask); - return 1; + return ret; } static int ti_hecc_error(struct net_device *ndev, int int_status, @@ -884,7 +910,7 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->offload.mailbox_read = ti_hecc_mailbox_read; priv->offload.mb_first = HECC_RX_FIRST_MBOX; - priv->offload.mb_last = HECC_MAX_TX_MBOX; + priv->offload.mb_last = HECC_RX_LAST_MBOX; err = can_rx_offload_add_timestamp(ndev, &priv->offload); if (err) { dev_err(&pdev->dev, "can_rx_offload_add_timestamp() failed\n"); -- cgit From 3b2d652da21450aba19d299a75fe3a6f5d4003ff Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:46:03 +0000 Subject: can: ti_hecc: properly report state changes The HECC_CANES register handles the flags specially, it only updates the flags after a one is written to them. Since the interrupt for frame errors is not enabled an old error can hence been seen when a state interrupt arrives. For example if the device is not connected to the CAN-bus the error warning interrupt will have HECC_CANES indicating there is no ack. The error passive interrupt thereafter will have HECC_CANES flagging that there is a warning level. And if thereafter there is a message successfully send HECC_CANES points to an error passive event, while in reality it became error warning again. In summary, the state is not always reported correctly. So handle the state changes and frame errors separately. The state changes are now based on the interrupt flags and handled directly when they occur. The reporting of the frame errors is still done as before, as a side effect of another interrupt. note: the hecc_clear_bit will do a read, modify, write. So it will not only clear the bit, but also reset all other bits being set as a side affect, hence it is replaced with only clearing the flags. note: The HECC_CANMC_CCR is no longer cleared in the state change interrupt, it is completely unrelated. And use net_ratelimit to make checkpatch happy. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 162 +++++++++++++++++++++++++--------------------- 1 file changed, 88 insertions(+), 74 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index b12fd0bd489d..4c6d3ce0e8c4 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -150,6 +150,8 @@ MODULE_VERSION(HECC_MODULE_VERSION); #define HECC_BUS_ERROR (HECC_CANES_FE | HECC_CANES_BE |\ HECC_CANES_CRCE | HECC_CANES_SE |\ HECC_CANES_ACKE) +#define HECC_CANES_FLAGS (HECC_BUS_ERROR | HECC_CANES_BO |\ + HECC_CANES_EP | HECC_CANES_EW) #define HECC_CANMCF_RTR BIT(4) /* Remote transmit request */ @@ -592,91 +594,69 @@ static int ti_hecc_error(struct net_device *ndev, int int_status, u32 timestamp; int err; - /* propagate the error condition to the can stack */ - skb = alloc_can_err_skb(ndev, &cf); - if (!skb) { - if (printk_ratelimit()) - netdev_err(priv->ndev, - "%s: alloc_can_err_skb() failed\n", - __func__); - return -ENOMEM; - } - - if (int_status & HECC_CANGIF_WLIF) { /* warning level int */ - if ((int_status & HECC_CANGIF_BOIF) == 0) { - priv->can.state = CAN_STATE_ERROR_WARNING; - ++priv->can.can_stats.error_warning; - cf->can_id |= CAN_ERR_CRTL; - if (hecc_read(priv, HECC_CANTEC) > 96) - cf->data[1] |= CAN_ERR_CRTL_TX_WARNING; - if (hecc_read(priv, HECC_CANREC) > 96) - cf->data[1] |= CAN_ERR_CRTL_RX_WARNING; - } - hecc_set_bit(priv, HECC_CANES, HECC_CANES_EW); - netdev_dbg(priv->ndev, "Error Warning interrupt\n"); - hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); - } - - if (int_status & HECC_CANGIF_EPIF) { /* error passive int */ - if ((int_status & HECC_CANGIF_BOIF) == 0) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - ++priv->can.can_stats.error_passive; - cf->can_id |= CAN_ERR_CRTL; - if (hecc_read(priv, HECC_CANTEC) > 127) - cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE; - if (hecc_read(priv, HECC_CANREC) > 127) - cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE; + if (err_status & HECC_BUS_ERROR) { + /* propagate the error condition to the can stack */ + skb = alloc_can_err_skb(ndev, &cf); + if (!skb) { + if (net_ratelimit()) + netdev_err(priv->ndev, + "%s: alloc_can_err_skb() failed\n", + __func__); + return -ENOMEM; } - hecc_set_bit(priv, HECC_CANES, HECC_CANES_EP); - netdev_dbg(priv->ndev, "Error passive interrupt\n"); - hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); - } - - /* Need to check busoff condition in error status register too to - * ensure warning interrupts don't hog the system - */ - if ((int_status & HECC_CANGIF_BOIF) || (err_status & HECC_CANES_BO)) { - priv->can.state = CAN_STATE_BUS_OFF; - cf->can_id |= CAN_ERR_BUSOFF; - hecc_set_bit(priv, HECC_CANES, HECC_CANES_BO); - hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR); - /* Disable all interrupts in bus-off to avoid int hog */ - hecc_write(priv, HECC_CANGIM, 0); - ++priv->can.can_stats.bus_off; - can_bus_off(ndev); - } - if (err_status & HECC_BUS_ERROR) { ++priv->can.can_stats.bus_error; cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; - if (err_status & HECC_CANES_FE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE); + if (err_status & HECC_CANES_FE) cf->data[2] |= CAN_ERR_PROT_FORM; - } - if (err_status & HECC_CANES_BE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_BE); + if (err_status & HECC_CANES_BE) cf->data[2] |= CAN_ERR_PROT_BIT; - } - if (err_status & HECC_CANES_SE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_SE); + if (err_status & HECC_CANES_SE) cf->data[2] |= CAN_ERR_PROT_STUFF; - } - if (err_status & HECC_CANES_CRCE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE); + if (err_status & HECC_CANES_CRCE) cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ; - } - if (err_status & HECC_CANES_ACKE) { - hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE); + if (err_status & HECC_CANES_ACKE) cf->data[3] = CAN_ERR_PROT_LOC_ACK; - } + + timestamp = hecc_read(priv, HECC_CANLNT); + err = can_rx_offload_queue_sorted(&priv->offload, skb, + timestamp); + if (err) + ndev->stats.rx_fifo_errors++; + } + + hecc_write(priv, HECC_CANES, HECC_CANES_FLAGS); + + return 0; +} + +static void ti_hecc_change_state(struct net_device *ndev, + enum can_state rx_state, + enum can_state tx_state) +{ + struct ti_hecc_priv *priv = netdev_priv(ndev); + struct can_frame *cf; + struct sk_buff *skb; + u32 timestamp; + int err; + + skb = alloc_can_err_skb(priv->ndev, &cf); + if (unlikely(!skb)) { + priv->can.state = max(tx_state, rx_state); + return; + } + + can_change_state(priv->ndev, cf, tx_state, rx_state); + + if (max(tx_state, rx_state) != CAN_STATE_BUS_OFF) { + cf->data[6] = hecc_read(priv, HECC_CANTEC); + cf->data[7] = hecc_read(priv, HECC_CANREC); } timestamp = hecc_read(priv, HECC_CANLNT); err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); if (err) ndev->stats.rx_fifo_errors++; - - return 0; } static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) @@ -686,6 +666,7 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) struct net_device_stats *stats = &ndev->stats; u32 mbxno, mbx_mask, int_status, err_status, stamp; unsigned long flags, rx_pending; + u32 handled = 0; int_status = hecc_read(priv, priv->use_hecc1int ? @@ -695,10 +676,43 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) return IRQ_NONE; err_status = hecc_read(priv, HECC_CANES); - if (err_status & (HECC_BUS_ERROR | HECC_CANES_BO | - HECC_CANES_EP | HECC_CANES_EW)) + if (unlikely(err_status & HECC_CANES_FLAGS)) ti_hecc_error(ndev, int_status, err_status); + if (unlikely(int_status & HECC_CANGIM_DEF_MASK)) { + enum can_state rx_state, tx_state; + u32 rec = hecc_read(priv, HECC_CANREC); + u32 tec = hecc_read(priv, HECC_CANTEC); + + if (int_status & HECC_CANGIF_WLIF) { + handled |= HECC_CANGIF_WLIF; + rx_state = rec >= tec ? CAN_STATE_ERROR_WARNING : 0; + tx_state = rec <= tec ? CAN_STATE_ERROR_WARNING : 0; + netdev_dbg(priv->ndev, "Error Warning interrupt\n"); + ti_hecc_change_state(ndev, rx_state, tx_state); + } + + if (int_status & HECC_CANGIF_EPIF) { + handled |= HECC_CANGIF_EPIF; + rx_state = rec >= tec ? CAN_STATE_ERROR_PASSIVE : 0; + tx_state = rec <= tec ? CAN_STATE_ERROR_PASSIVE : 0; + netdev_dbg(priv->ndev, "Error passive interrupt\n"); + ti_hecc_change_state(ndev, rx_state, tx_state); + } + + if (int_status & HECC_CANGIF_BOIF) { + handled |= HECC_CANGIF_BOIF; + rx_state = CAN_STATE_BUS_OFF; + tx_state = CAN_STATE_BUS_OFF; + netdev_dbg(priv->ndev, "Bus off interrupt\n"); + + /* Disable all interrupts */ + hecc_write(priv, HECC_CANGIM, 0); + can_bus_off(ndev); + ti_hecc_change_state(ndev, rx_state, tx_state); + } + } + if (int_status & HECC_CANGIF_GMIF) { while (priv->tx_tail - priv->tx_head > 0) { mbxno = get_tx_tail_mb(priv); @@ -734,10 +748,10 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) /* clear all interrupt conditions - read back to avoid spurious ints */ if (priv->use_hecc1int) { - hecc_write(priv, HECC_CANGIF1, HECC_SET_REG); + hecc_write(priv, HECC_CANGIF1, handled); int_status = hecc_read(priv, HECC_CANGIF1); } else { - hecc_write(priv, HECC_CANGIF0, HECC_SET_REG); + hecc_write(priv, HECC_CANGIF0, handled); int_status = hecc_read(priv, HECC_CANGIF0); } -- cgit From b5018be6d5dd9dd257bf8236298daac8b1262750 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Tue, 24 Sep 2019 18:46:06 +0000 Subject: can: ti_hecc: add missing state changes While the ti_hecc has interrupts to report when the error counters increase to a certain level and which change state it doesn't handle the case that the error counters go down again, so the reported state can actually be wrong. Since there is no interrupt for that, do update state based on the error counters, when the state is not error active and goes down again. Signed-off-by: Jeroen Hofstee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/ti_hecc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 4c6d3ce0e8c4..31ad364a89bb 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -711,6 +711,23 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) can_bus_off(ndev); ti_hecc_change_state(ndev, rx_state, tx_state); } + } else if (unlikely(priv->can.state != CAN_STATE_ERROR_ACTIVE)) { + enum can_state new_state, tx_state, rx_state; + u32 rec = hecc_read(priv, HECC_CANREC); + u32 tec = hecc_read(priv, HECC_CANTEC); + + if (rec >= 128 || tec >= 128) + new_state = CAN_STATE_ERROR_PASSIVE; + else if (rec >= 96 || tec >= 96) + new_state = CAN_STATE_ERROR_WARNING; + else + new_state = CAN_STATE_ERROR_ACTIVE; + + if (new_state < priv->can.state) { + rx_state = rec >= tec ? new_state : 0; + tx_state = rec <= tec ? new_state : 0; + ti_hecc_change_state(ndev, rx_state, tx_state); + } } if (int_status & HECC_CANGIF_GMIF) { -- cgit From db1a804cca6fe0cea9dea888d50dda134713c340 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 18 Sep 2019 11:11:56 +0100 Subject: can: j1939: fix resource leak of skb on error return paths Currently the error return paths do not free skb and this results in a memory leak. Fix this by freeing them before the return. Addresses-Coverity: ("Resource leak") Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Colin Ian King Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 37c1040bcb9c..5c6eabcb5df1 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -909,8 +909,10 @@ void j1939_sk_errqueue(struct j1939_session *session, memset(serr, 0, sizeof(*serr)); switch (type) { case J1939_ERRQUEUE_ACK: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) { + kfree_skb(skb); return; + } serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; @@ -918,8 +920,10 @@ void j1939_sk_errqueue(struct j1939_session *session, state = "ACK"; break; case J1939_ERRQUEUE_SCHED: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) { + kfree_skb(skb); return; + } serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; -- cgit From 896daf723c845289a4ea1e68e74a5a5475aa796d Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 10 Oct 2019 12:50:31 +0200 Subject: can: j1939: fix memory leak if filters was set Filters array is coped from user space and linked to the j1939 socket. On socket release this memory was not freed. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 5c6eabcb5df1..4d8ba701e15d 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -580,6 +580,7 @@ static int j1939_sk_release(struct socket *sock) j1939_netdev_stop(priv); } + kfree(jsk->filters); sock_orphan(sk); sock->sk = NULL; -- cgit From eaa654f164ba9acd5656e6485eeb5e73da8bfc3e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 25 Oct 2019 15:04:13 +0200 Subject: can: j1939: transport: j1939_session_fresh_new(): make sure EOMA is send with the total message size set We were sending malformed EOMA messageswith total message size set to 0. This patch fixes the bug. Reported-by: https://github.com/linux-can/can-utils/issues/159 Signed-off-by: Oleksij Rempel Acked-by: Kurt Van Dijck Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fe000ea757ea..06183d6f4fb7 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1432,7 +1432,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, skcb = j1939_skb_to_cb(skb); memcpy(skcb, rel_skcb, sizeof(*skcb)); - session = j1939_session_new(priv, skb, skb->len); + session = j1939_session_new(priv, skb, size); if (!session) { kfree_skb(skb); return NULL; -- cgit From 688d11c38423fd98ab6c8d5fc1976c8f365fc875 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 25 Oct 2019 15:04:13 +0200 Subject: can: j1939: transport: j1939_xtp_rx_eoma_one(): Add sanity check for correct total message size We were sending malformed EOMA with total message size set to 0. This issue has been fixed in the previous patch. In this patch a sanity check is added to the RX path and a error message is displayed. Signed-off-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 06183d6f4fb7..e5f1a56994c6 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1273,9 +1273,27 @@ j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, static void j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) { + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + const u8 *dat; + int len; + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) return; + dat = skb->data; + + if (skcb->addr.type == J1939_ETP) + len = j1939_etp_ctl_to_size(dat); + else + len = j1939_tp_ctl_to_size(dat); + + if (session->total_message_size != len) { + netdev_warn_once(session->priv->ndev, + "%s: 0x%p: Incorrect size. Expected: %i; got: %i.\n", + __func__, session, session->total_message_size, + len); + } + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); session->pkt.tx_acked = session->pkt.total; -- cgit From 27a0e54bae09d2dd023a01254db506d61cc50ba1 Mon Sep 17 00:00:00 2001 From: Timo Schlüßler Date: Fri, 11 Oct 2019 15:38:19 +0200 Subject: can: mcp251x: mcp251x_restart_work_handler(): Fix potential force_quit race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mcp251x_restart_work_handler() the variable to stop the interrupt handler (priv->force_quit) is reset after the chip is restarted and thus a interrupt might occur. This patch fixes the potential race condition by resetting force_quit before enabling interrupts. Signed-off-by: Timo Schlüßler Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index bee9f7b8dad6..bb20a9b75cc6 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -717,6 +717,7 @@ static void mcp251x_restart_work_handler(struct work_struct *ws) if (priv->after_suspend) { mcp251x_hw_reset(spi); mcp251x_setup(net, spi); + priv->force_quit = 0; if (priv->after_suspend & AFTER_SUSPEND_RESTART) { mcp251x_set_normal_mode(spi); } else if (priv->after_suspend & AFTER_SUSPEND_UP) { @@ -728,7 +729,6 @@ static void mcp251x_restart_work_handler(struct work_struct *ws) mcp251x_hw_sleep(spi); } priv->after_suspend = 0; - priv->force_quit = 0; } if (priv->restart_tx) { -- cgit From 3926a3a025d443f6b7a58a2c0c33e7d77c1ca935 Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Thu, 19 Sep 2019 15:53:04 +0200 Subject: can: don't use deprecated license identifiers The "GPL-2.0" license identifier changed to "GPL-2.0-only" in SPDX v3.0. Signed-off-by: Yegor Yefremov Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 2 +- include/uapi/linux/can/bcm.h | 2 +- include/uapi/linux/can/error.h | 2 +- include/uapi/linux/can/gw.h | 2 +- include/uapi/linux/can/j1939.h | 2 +- include/uapi/linux/can/netlink.h | 2 +- include/uapi/linux/can/raw.h | 2 +- include/uapi/linux/can/vxcan.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 1e988fdeba34..6a6d2c7655ff 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can.h * diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index 0fb328d93148..dd2b925b09ac 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/bcm.h * diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index bfc4b5d22a5e..34633283de64 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/error.h * diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 3aea5388c8e4..c2190bbe21d8 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/gw.h * diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index c32325342d30..df6e821075c1 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* * j1939.h * diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 1bc70d3a4d39..6f598b73839e 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* * linux/can/netlink.h * diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index be3b36e7ff61..6a11d308eb5c 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * linux/can/raw.h * diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h index 066812d118a2..4fa9d8777a07 100644 --- a/include/uapi/linux/can/vxcan.h +++ b/include/uapi/linux/can/vxcan.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ #ifndef _UAPI_CAN_VXCAN_H #define _UAPI_CAN_VXCAN_H -- cgit From 0763b3e81a82477363d72548604455bf9468c2fa Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Sat, 2 Nov 2019 01:28:28 +0200 Subject: taprio: fix panic while hw offload sched list swap Don't swap oper and admin schedules too early, it's not correct and causes crash. Steps to reproduce: 1) tc qdisc replace dev eth0 parent root handle 100 taprio \ num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 1@2 \ base-time $SOME_BASE_TIME \ sched-entry S 01 80000 \ sched-entry S 02 15000 \ sched-entry S 04 40000 \ flags 2 2) tc qdisc replace dev eth0 parent root handle 100 taprio \ base-time $SOME_BASE_TIME \ sched-entry S 01 90000 \ sched-entry S 02 20000 \ sched-entry S 04 40000 \ flags 2 3) tc qdisc replace dev eth0 parent root handle 100 taprio \ base-time $SOME_BASE_TIME \ sched-entry S 01 150000 \ sched-entry S 02 200000 \ sched-entry S 04 40000 \ flags 2 Do 2 3 2 .. steps more times if not happens and observe: [ 305.832319] Unable to handle kernel write to read-only memory at virtual address ffff0000087ce7f0 [ 305.910887] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [ 305.919306] Hardware name: Texas Instruments AM654 Base Board (DT) [...] [ 306.017119] x1 : ffff800848031d88 x0 : ffff800848031d80 [ 306.022422] Call trace: [ 306.024866] taprio_free_sched_cb+0x4c/0x98 [ 306.029040] rcu_process_callbacks+0x25c/0x410 [ 306.033476] __do_softirq+0x10c/0x208 [ 306.037132] irq_exit+0xb8/0xc8 [ 306.040267] __handle_domain_irq+0x64/0xb8 [ 306.044352] gic_handle_irq+0x7c/0x178 [ 306.048092] el1_irq+0xb0/0x128 [ 306.051227] arch_cpu_idle+0x10/0x18 [ 306.054795] do_idle+0x120/0x138 [ 306.058015] cpu_startup_entry+0x20/0x28 [ 306.061931] rest_init+0xcc/0xd8 [ 306.065154] start_kernel+0x3bc/0x3e4 [ 306.068810] Code: f2fbd5b7 f2fbd5b6 d503201f f9400422 (f9000662) [ 306.074900] ---[ end trace 96c8e2284a9d9d6e ]--- [ 306.079507] Kernel panic - not syncing: Fatal exception in interrupt [ 306.085847] SMP: stopping secondary CPUs [ 306.089765] Kernel Offset: disabled Try to explain one of the possible crash cases: The "real" admin list is assigned when admin_sched is set to new_admin, it happens after "swap", that assigns to oper_sched NULL. Thus if call qdisc show it can crash. Farther, next second time, when sched list is updated, the admin_sched is not NULL and becomes the oper_sched, previous oper_sched was NULL so just skipped. But then admin_sched is assigned new_admin, but schedules to free previous assigned admin_sched (that already became oper_sched). Farther, next third time, when sched list is updated, while one more swap, oper_sched is not null, but it was happy to be freed already (while prev. admin update), so while try to free oper_sched the kernel panic happens at taprio_free_sched_cb(). So, move the "swap emulation" where it should be according to function comment from code. Fixes: 9c66d15646760e ("taprio: Add support for hardware offloading") Signed-off-by: Ivan Khoronzhuk Acked-by: Vinicius Costa Gomes Tested-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 2121187229cd..7cd68628c637 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1224,8 +1224,6 @@ static int taprio_enable_offload(struct net_device *dev, goto done; } - taprio_offload_config_changed(q); - done: taprio_offload_free(offload); @@ -1505,6 +1503,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, call_rcu(&admin->rcu, taprio_free_sched_cb); spin_unlock_irqrestore(&q->current_entry_lock, flags); + + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + taprio_offload_config_changed(q); } new_admin = NULL; -- cgit From 1899bb325149e481de31a4f32b59ea6f24e176ea Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 1 Nov 2019 21:56:42 -0700 Subject: bonding: fix state transition issue in link monitoring Since de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring"), the bonding driver has utilized two separate variables to indicate the next link state a particular slave should transition to. Each is used to communicate to a different portion of the link state change commit logic; one to the bond_miimon_commit function itself, and another to the state transition logic. Unfortunately, the two variables can become unsynchronized, resulting in incorrect link state transitions within bonding. This can cause slaves to become stuck in an incorrect link state until a subsequent carrier state transition. The issue occurs when a special case in bond_slave_netdev_event sets slave->link directly to BOND_LINK_FAIL. On the next pass through bond_miimon_inspect after the slave goes carrier up, the BOND_LINK_FAIL case will set the proposed next state (link_new_state) to BOND_LINK_UP, but the new_link to BOND_LINK_DOWN. The setting of the final link state from new_link comes after that from link_new_state, and so the slave will end up incorrectly in _DOWN state. Resolve this by combining the two variables into one. Reported-by: Aleksei Zakharov Reported-by: Sha Zhang Cc: Mahesh Bandewar Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring") Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 44 ++++++++++++++++++++--------------------- include/net/bonding.h | 3 +-- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 480f9459b402..62f65573eb04 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2083,8 +2083,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !rcu_dereference(bond->curr_active_slave); bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; - slave->link_new_state = slave->link; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2118,7 +2117,7 @@ static int bond_miimon_inspect(struct bonding *bond) } if (slave->delay <= 0) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; continue; } @@ -2155,7 +2154,7 @@ static int bond_miimon_inspect(struct bonding *bond) slave->delay = 0; if (slave->delay <= 0) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; ignore_updelay = false; continue; @@ -2193,7 +2192,7 @@ static void bond_miimon_commit(struct bonding *bond) struct slave *slave, *primary; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: /* For 802.3ad mode, check current slave speed and * duplex again in case its port was disabled after @@ -2265,8 +2264,8 @@ static void bond_miimon_commit(struct bonding *bond) default: slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", - slave->new_link); - slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state); + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); continue; } @@ -2674,13 +2673,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2705,7 +2704,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2736,8 +2735,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) goto re_arm; bond_for_each_slave(bond, slave, iter) { - if (slave->new_link != BOND_LINK_NOCHANGE) - slave->link = slave->new_link; + if (slave->link_new_state != BOND_LINK_NOCHANGE) + slave->link = slave->link_new_state; } if (slave_state_changed) { @@ -2760,9 +2759,9 @@ re_arm: } /* Called to inspect slaves for active-backup mode ARP monitor link state - * changes. Sets new_link in slaves to specify what action should take - * place for the slave. Returns 0 if no changes are found, >0 if changes - * to link states must be committed. + * changes. Sets proposed link state in slaves to specify what action + * should take place for the slave. Returns 0 if no changes are found, >0 + * if changes to link states must be committed. * * Called with rcu_read_lock held. */ @@ -2774,12 +2773,12 @@ static int bond_ab_arp_inspect(struct bonding *bond) int commit = 0; bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); last_rx = slave_last_rx(bond, slave); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; } continue; @@ -2807,7 +2806,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (!bond_is_active_slave(slave) && !rcu_access_pointer(bond->current_arp_slave) && !bond_time_in_interval(bond, last_rx, 3)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } @@ -2820,7 +2819,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_is_active_slave(slave) && (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, last_rx, 2))) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } } @@ -2840,7 +2839,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *slave; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: continue; @@ -2890,8 +2889,9 @@ static void bond_ab_arp_commit(struct bonding *bond) continue; default: - slave_err(bond->dev, slave->dev, "impossible: new_link %d on slave\n", - slave->new_link); + slave_err(bond->dev, slave->dev, + "impossible: link_new_state %d on slave\n", + slave->link_new_state); continue; } diff --git a/include/net/bonding.h b/include/net/bonding.h index 1afc125014da..3d56b026bb9e 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -159,7 +159,6 @@ struct slave { unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ - s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ @@ -549,7 +548,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state) static inline void bond_commit_link_state(struct slave *slave, bool notify) { - if (slave->link == slave->link_new_state) + if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; -- cgit From 2ef17216d732f40dcd96423384064d542e3ff658 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 2 Nov 2019 17:14:42 +0530 Subject: net: hns3: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Hisilicon network devices. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 75ccc1e7076b..a0998937727d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HNAE3_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 2110fa3b4479..5d468ed404a6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HNS3_ENET_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 4821fe08b5e4..1426eb5ddf3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_CMD_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h index 278f21e02736..b04702e65689 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_DCB_H__ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index c3d56b872ed7..59b824347ba4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_MAIN_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index ef095d9c566f..dd9a1218a7b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_MDIO_H diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 818610988d34..260f22d19d81 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright (c) 2016-2017 Hisilicon Limited. #ifndef __HCLGE_TM_H -- cgit From 59eb87cb52c9f7164804bc8639c4d03ba9b0c169 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Sat, 2 Nov 2019 14:17:47 +0000 Subject: net: sched: prevent duplicate flower rules from tcf_proto destroy race When a new filter is added to cls_api, the function tcf_chain_tp_insert_unique() looks up the protocol/priority/chain to determine if the tcf_proto is duplicated in the chain's hashtable. It then creates a new entry or continues with an existing one. In cls_flower, this allows the function fl_ht_insert_unque to determine if a filter is a duplicate and reject appropriately, meaning that the duplicate will not be passed to drivers via the offload hooks. However, when a tcf_proto is destroyed it is removed from its chain before a hardware remove hook is hit. This can lead to a race whereby the driver has not received the remove message but duplicate flows can be accepted. This, in turn, can lead to the offload driver receiving incorrect duplicate flows and out of order add/delete messages. Prevent duplicates by utilising an approach suggested by Vlad Buslov. A hash table per block stores each unique chain/protocol/prio being destroyed. This entry is only removed when the full destroy (and hardware offload) has completed. If a new flow is being added with the same identiers as a tc_proto being detroyed, then the add request is replayed until the destroy is complete. Fixes: 8b64678e0af8 ("net: sched: refactor tp insert/delete for concurrent execution") Signed-off-by: John Hurley Signed-off-by: Vlad Buslov Reviewed-by: Simon Horman Reported-by: Louis Peens Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 +++ net/sched/cls_api.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 637548d54b3e..d80acda231ae 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -362,6 +363,7 @@ struct tcf_proto { bool deleting; refcount_t refcnt; struct rcu_head rcu; + struct hlist_node destroy_ht_node; }; struct qdisc_skb_cb { @@ -414,6 +416,8 @@ struct tcf_block { struct list_head filter_chain_list; } chain0; struct rcu_head rcu; + DECLARE_HASHTABLE(proto_destroy_ht, 7); + struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; #ifdef CONFIG_PROVE_LOCKING diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8717c0b26c90..20d60b8fcb70 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,62 @@ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); +static u32 destroy_obj_hashfn(const struct tcf_proto *tp) +{ + return jhash_3words(tp->chain->index, tp->prio, + (__force __u32)tp->protocol, 0); +} + +static void tcf_proto_signal_destroying(struct tcf_chain *chain, + struct tcf_proto *tp) +{ + struct tcf_block *block = chain->block; + + mutex_lock(&block->proto_destroy_lock); + hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node, + destroy_obj_hashfn(tp)); + mutex_unlock(&block->proto_destroy_lock); +} + +static bool tcf_proto_cmp(const struct tcf_proto *tp1, + const struct tcf_proto *tp2) +{ + return tp1->chain->index == tp2->chain->index && + tp1->prio == tp2->prio && + tp1->protocol == tp2->protocol; +} + +static bool tcf_proto_exists_destroying(struct tcf_chain *chain, + struct tcf_proto *tp) +{ + u32 hash = destroy_obj_hashfn(tp); + struct tcf_proto *iter; + bool found = false; + + rcu_read_lock(); + hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter, + destroy_ht_node, hash) { + if (tcf_proto_cmp(tp, iter)) { + found = true; + break; + } + } + rcu_read_unlock(); + + return found; +} + +static void +tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp) +{ + struct tcf_block *block = chain->block; + + mutex_lock(&block->proto_destroy_lock); + if (hash_hashed(&tp->destroy_ht_node)) + hash_del_rcu(&tp->destroy_ht_node); + mutex_unlock(&block->proto_destroy_lock); +} + /* Find classifier type by string name */ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) @@ -234,9 +291,11 @@ static void tcf_proto_get(struct tcf_proto *tp) static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, - struct netlink_ext_ack *extack) + bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); + if (sig_destroy) + tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); @@ -246,7 +305,7 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { if (refcount_dec_and_test(&tp->refcnt)) - tcf_proto_destroy(tp, rtnl_held, extack); + tcf_proto_destroy(tp, rtnl_held, true, extack); } static int walker_check_empty(struct tcf_proto *tp, void *fh, @@ -370,6 +429,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain) static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); + mutex_destroy(&block->proto_destroy_lock); kfree_rcu(block, rcu); } @@ -545,6 +605,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_dereference(chain->filter_chain, chain); + while (tp) { + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_signal_destroying(chain, tp); + tp = tp_next; + } + tp = tcf_chain_dereference(chain->filter_chain, chain); RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); chain->flushing = true; @@ -844,6 +910,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + mutex_init(&block->proto_destroy_lock); init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); @@ -1621,6 +1688,12 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_lock(&chain->filter_chain_lock); + if (tcf_proto_exists_destroying(chain, tp_new)) { + mutex_unlock(&chain->filter_chain_lock); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); + return ERR_PTR(-EAGAIN); + } + tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp) @@ -1628,10 +1701,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_unlock(&chain->filter_chain_lock); if (tp) { - tcf_proto_destroy(tp_new, rtnl_held, NULL); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = tp; } else if (err) { - tcf_proto_destroy(tp_new, rtnl_held, NULL); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = ERR_PTR(err); } @@ -1669,6 +1742,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, return; } + tcf_proto_signal_destroying(chain, tp); next = tcf_chain_dereference(chain_info.next, chain); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); @@ -2188,6 +2262,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -EINVAL; goto errout_locked; } else if (t->tcm_handle == 0) { + tcf_proto_signal_destroying(chain, tp); tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); -- cgit From e684000b8a2b1e14b9f8ebd72dfd998d44a864ca Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 2 Nov 2019 20:17:39 -0700 Subject: net: dsa: bcm_sf2: Fix driver removal With the DSA core doing the call to dsa_port_disable() we do not need to do that within the driver itself. This could cause an use after free since past dsa_unregister_switch() we should not be accessing any dsa_switch internal structures. Fixes: 0394a63acfe2 ("net: dsa: enable and disable all ports") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d44651ad520c..69fc13046ac7 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1215,10 +1215,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); priv->wol_ports_mask = 0; + /* Disable interrupts */ + bcm_sf2_intr_disable(priv); dsa_unregister_switch(priv->dev->ds); bcm_sf2_cfp_exit(priv->dev->ds); - /* Disable all ports and interrupts */ - bcm_sf2_sw_suspend(priv->dev->ds); bcm_sf2_mdio_unregister(priv); return 0; -- cgit From 57d0f00dfeb3775eae88af1c4aeda6bd35943f20 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Sun, 3 Nov 2019 11:11:35 +0200 Subject: mlx4_core: fix wrong comment about the reason of subtract one from the max_cqes The reason for the pre-allocation of one CQE is to enable resizing of the CQ. Fix comment accordingly. Signed-off-by: Dotan Barak Signed-off-by: Eli Cohen Signed-off-by: Vladimir Sokolovsky Signed-off-by: Yuval Shaia Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index fce9b3a24347..69bb6bb06e76 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -514,8 +514,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a - * spare CQE so the HCA HW can tell the difference between an - * empty CQ and a full CQ. + * spare CQE to enable resizing the CQ. */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; -- cgit From 683916f6a84023407761d843048f1aea486b2612 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Nov 2019 15:36:57 -0800 Subject: net/tls: fix sk_msg trim on fallback to copy mode sk_msg_trim() tries to only update curr pointer if it falls into the trimmed region. The logic, however, does not take into the account pointer wrapping that sk_msg_iter_var_prev() does nor (as John points out) the fact that msg->sg is a ring buffer. This means that when the message was trimmed completely, the new curr pointer would have the value of MAX_MSG_FRAGS - 1, which is neither smaller than any other value, nor would it actually be correct. Special case the trimming to 0 length a little bit and rework the comparison between curr and end to take into account wrapping. This bug caused the TLS code to not copy all of the message, if zero copy filled in fewer sg entries than memcopy would need. Big thanks to Alexander Potapenko for the non-KMSAN reproducer. v2: - take into account that msg->sg is a ring buffer (John). Link: https://lore.kernel.org/netdev/20191030160542.30295-1-jakub.kicinski@netronome.com/ (v1) Fixes: d829e9c4112b ("tls: convert to generic sk_msg interface") Reported-by: syzbot+f8495bff23a879a6d0bd@syzkaller.appspotmail.com Reported-by: syzbot+6f50c99e8f6194bf363f@syzkaller.appspotmail.com Co-developed-by: John Fastabend Signed-off-by: Jakub Kicinski Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/skmsg.h | 9 ++++++--- net/core/skmsg.c | 20 +++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e4b3fb4bb77c..ce7055259877 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -139,6 +139,11 @@ static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) } } +static inline u32 sk_msg_iter_dist(u32 start, u32 end) +{ + return end >= start ? end - start : end + (MAX_MSG_FRAGS - start); +} + #define sk_msg_iter_var_prev(var) \ do { \ if (var == 0) \ @@ -198,9 +203,7 @@ static inline u32 sk_msg_elem_used(const struct sk_msg *msg) if (sk_msg_full(msg)) return MAX_MSG_FRAGS; - return msg->sg.end >= msg->sg.start ? - msg->sg.end - msg->sg.start : - msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); + return sk_msg_iter_dist(msg->sg.start, msg->sg.end); } static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cf390e0aa73d..ad31e4e53d0a 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -270,18 +270,28 @@ void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len) msg->sg.data[i].length -= trim; sk_mem_uncharge(sk, trim); + /* Adjust copybreak if it falls into the trimmed part of last buf */ + if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length) + msg->sg.copybreak = msg->sg.data[i].length; out: - /* If we trim data before curr pointer update copybreak and current - * so that any future copy operations start at new copy location. + sk_msg_iter_var_next(i); + msg->sg.end = i; + + /* If we trim data a full sg elem before curr pointer update + * copybreak and current so that any future copy operations + * start at new copy location. * However trimed data that has not yet been used in a copy op * does not require an update. */ - if (msg->sg.curr >= i) { + if (!msg->sg.size) { + msg->sg.curr = msg->sg.start; + msg->sg.copybreak = 0; + } else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >= + sk_msg_iter_dist(msg->sg.start, msg->sg.end)) { + sk_msg_iter_var_prev(i); msg->sg.curr = i; msg->sg.copybreak = msg->sg.data[i].length; } - sk_msg_iter_var_next(i); - msg->sg.end = i; } EXPORT_SYMBOL_GPL(sk_msg_trim); -- cgit From e7a86c687e64ab24f88330ad24ecc9442ce40c5a Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Mon, 4 Nov 2019 17:54:22 -0700 Subject: net: qualcomm: rmnet: Fix potential UAF when unregistering During the exit/unregistration process of the RmNet driver, the function rmnet_unregister_real_device() is called to handle freeing the driver's internal state and removing the RX handler on the underlying physical device. However, the order of operations this function performs is wrong and can lead to a use after free of the rmnet_port structure. Before calling netdev_rx_handler_unregister(), this port structure is freed with kfree(). If packets are received on any RmNet devices before synchronize_net() completes, they will attempt to use this already-freed port structure when processing the packet. As such, before cleaning up any other internal state, the RX handler must be unregistered in order to guarantee that no further packets will arrive on the device. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 9c54b715228e..06de59521fc4 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -57,10 +57,10 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, if (port->nr_rmnet_devs) return -EINVAL; - kfree(port); - netdev_rx_handler_unregister(real_dev); + kfree(port); + /* release reference on real_dev */ dev_put(real_dev); -- cgit From f75359f3ac855940c5718af10ba089b8977bf339 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Nov 2019 21:38:43 -0800 Subject: net: prevent load/store tearing on sk->sk_stamp Add a couple of READ_ONCE() and WRITE_ONCE() to prevent load-tearing and store-tearing in sock_read_timestamp() and sock_write_timestamp() This might prevent another KCSAN report. Fixes: 3a0ed3e96197 ("sock: Make sock->sk_stamp thread-safe") Signed-off-by: Eric Dumazet Cc: Deepa Dinamani Acked-by: Deepa Dinamani Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8f9adcfac41b..718e62fbe869 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2342,7 +2342,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk) return kt; #else - return sk->sk_stamp; + return READ_ONCE(sk->sk_stamp); #endif } @@ -2353,7 +2353,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) sk->sk_stamp = kt; write_sequnlock(&sk->sk_stamp_seq); #else - sk->sk_stamp = kt; + WRITE_ONCE(sk->sk_stamp, kt); #endif } -- cgit From 517ce4e93368938b204451285e53014549804868 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 5 Nov 2019 16:34:07 +0800 Subject: NFC: fdp: fix incorrect free object The address of fw_vsc_cfg is on stack. Releasing it with devm_kfree() is incorrect, which may result in a system crash or other security impacts. The expected object to free is *fw_vsc_cfg. Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/nfc/fdp/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index 1cd113c8d7cb..ad0abb1f0bae 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -259,7 +259,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, *fw_vsc_cfg, len); if (r) { - devm_kfree(dev, fw_vsc_cfg); + devm_kfree(dev, *fw_vsc_cfg); goto vsc_read_err; } } else { -- cgit From 2836654a2735d3bc0479edd3ca7457d909b007ed Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 5 Nov 2019 14:13:48 +0200 Subject: Documentation: TLS: Add missing counter description Add TLS TX counter description for the handshake retransmitted packets that triggers the resync procedure then skip it, going into the regular transmit flow. Fixes: 46a3ea98074e ("net/mlx5e: kTLS, Enhance TX resync flow") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 0dd3f748239f..f914e81fd3a6 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -436,6 +436,10 @@ by the driver: encryption. * ``tx_tls_ooo`` - number of TX packets which were part of a TLS stream but did not arrive in the expected order. + * ``tx_tls_skip_no_sync_data`` - number of TX packets which were part of + a TLS stream and arrived out-of-order, but skipped the HW offload routine + and went to the regular transmit flow as they were retransmissions of the + connection handshake. * ``tx_tls_drop_no_sync_data`` - number of TX packets which were part of a TLS stream dropped, because they arrived out of order and associated record could not be found. -- cgit From 3a55402c93877d291b0a612d25edb03d1b4b93ac Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:24 -0800 Subject: net: bcmgenet: use RGMII loopback for MAC reset As noted in commit 28c2d1a7a0bf ("net: bcmgenet: enable loopback during UniMAC sw_reset") the UniMAC must be clocked while sw_reset is asserted for its state machines to reset cleanly. The transmit and receive clocks used by the UniMAC are derived from the signals used on its PHY interface. The bcmgenet MAC can be configured to work with different PHY interfaces including MII, GMII, RGMII, and Reverse MII on internal and external interfaces. Unfortunately for the UniMAC, when configured for MII the Tx clock is always driven from the PHY which places it outside of the direct control of the MAC. The earlier commit enabled a local loopback mode within the UniMAC so that the receive clock would be derived from the transmit clock which addressed the observed issue with an external GPHY disabling it's Rx clock. However, when a Tx clock is not available this loopback is insufficient. This commit implements a workaround that leverages the fact that the MAC can reliably generate all of its necessary clocking by enterring the external GPHY RGMII interface mode with the UniMAC in local loopback during the sw_reset interval. Unfortunately, this has the undesirable side efect of the RGMII GTXCLK signal being driven during the same window. In most configurations this is a benign side effect as the signal is either not routed to a pin or is already expected to drive the pin. The one exception is when an external MII PHY is expected to drive the same pin with its TX_CLK output creating output driver contention. This commit exploits the IEEE 802.3 clause 22 standard defined isolate mode to force an external MII PHY to present a high impedance on its TX_CLK output during the window to prevent any contention at the pin. The MII interface is used internally with the 40nm internal EPHY which agressively disables its clocks for power savings leading to incomplete resets of the UniMAC and many instabilities observed over the years. The workaround of this commit is expected to put an end to those problems. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 -- drivers/net/ethernet/broadcom/genet/bcmmii.c | 33 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0f138280315a..a1776ed8d7a1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1996,8 +1996,6 @@ static void reset_umac(struct bcmgenet_priv *priv) /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */ bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD); - udelay(2); - bcmgenet_umac_writel(priv, 0, UMAC_CMD); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 17bb8d60a157..fcd181ae3a7d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -221,8 +221,38 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) const char *phy_name = NULL; u32 id_mode_dis = 0; u32 port_ctrl; + int bmcr = -1; + int ret; u32 reg; + /* MAC clocking workaround during reset of umac state machines */ + reg = bcmgenet_umac_readl(priv, UMAC_CMD); + if (reg & CMD_SW_RESET) { + /* An MII PHY must be isolated to prevent TXC contention */ + if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { + ret = phy_read(phydev, MII_BMCR); + if (ret >= 0) { + bmcr = ret; + ret = phy_write(phydev, MII_BMCR, + bmcr | BMCR_ISOLATE); + } + if (ret) { + netdev_err(dev, "failed to isolate PHY\n"); + return ret; + } + } + /* Switch MAC clocking to RGMII generated clock */ + bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL); + /* Ensure 5 clks with Rx disabled + * followed by 5 clks with Reset asserted + */ + udelay(4); + reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN); + bcmgenet_umac_writel(priv, reg, UMAC_CMD); + /* Ensure 5 more clocks before Rx is enabled */ + udelay(2); + } + priv->ext_phy = !priv->internal_phy && (priv->phy_interface != PHY_INTERFACE_MODE_MOCA); @@ -254,6 +284,9 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) phy_set_max_speed(phydev, SPEED_100); bcmgenet_sys_writel(priv, PORT_MODE_EXT_EPHY, SYS_PORT_CTRL); + /* Restore the MII PHY after isolation */ + if (bmcr >= 0) + phy_write(phydev, MII_BMCR, bmcr); break; case PHY_INTERFACE_MODE_REVMII: -- cgit From 6b6d017fccb4693767d2fcae9ef2fd05243748bb Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:25 -0800 Subject: Revert "net: bcmgenet: soft reset 40nm EPHYs before MAC init" This reverts commit 1f515486275a08a17a2c806b844cca18f7de5b34. This commit improved the chances of the umac resetting cleanly by ensuring that the PHY was restored to its normal operation prior to resetting the umac. However, there were still cases when the PHY might not be driving a Tx clock to the umac during this window (e.g. when the PHY detects no link). The previous commit now ensures that the unimac receives clocks from the MAC during its reset window so this commit is no longer needed. This commit also has an unintended negative impact on the MDIO performance of the UniMAC MDIO interface because it is used before the MDIO interrupts are reenabled, so it should be removed. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 28 +++---- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 112 ++++++++++++++----------- 3 files changed, 73 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a1776ed8d7a1..b5255dd08265 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2877,12 +2877,6 @@ static int bcmgenet_open(struct net_device *dev) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - ret = bcmgenet_mii_connect(dev); - if (ret) { - netdev_err(dev, "failed to connect to PHY\n"); - goto err_clk_disable; - } - /* take MAC out of reset */ bcmgenet_umac_reset(priv); @@ -2892,12 +2886,6 @@ static int bcmgenet_open(struct net_device *dev) reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); - ret = bcmgenet_mii_config(dev, true); - if (ret) { - netdev_err(dev, "unsupported PHY\n"); - goto err_disconnect_phy; - } - bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { @@ -2913,7 +2901,7 @@ static int bcmgenet_open(struct net_device *dev) ret = bcmgenet_init_dma(priv); if (ret) { netdev_err(dev, "failed to initialize DMA\n"); - goto err_disconnect_phy; + goto err_clk_disable; } /* Always enable ring 16 - descriptor ring */ @@ -2936,19 +2924,25 @@ static int bcmgenet_open(struct net_device *dev) goto err_irq0; } + ret = bcmgenet_mii_probe(dev); + if (ret) { + netdev_err(dev, "failed to connect to PHY\n"); + goto err_irq1; + } + bcmgenet_netif_start(dev); netif_tx_start_all_queues(dev); return 0; +err_irq1: + free_irq(priv->irq1, priv); err_irq0: free_irq(priv->irq0, priv); err_fini_dma: bcmgenet_dma_teardown(priv); bcmgenet_fini_dma(priv); -err_disconnect_phy: - phy_disconnect(dev->phydev); err_clk_disable: if (priv->internal_phy) bcmgenet_power_down(priv, GENET_POWER_PASSIVE); @@ -3629,8 +3623,6 @@ static int bcmgenet_resume(struct device *d) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - phy_init_hw(dev->phydev); - bcmgenet_umac_reset(priv); init_umac(priv); @@ -3639,6 +3631,8 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); + phy_init_hw(dev->phydev); + /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7fbf573d8d52..dbc69d8fa05f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -720,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF); /* MDIO routines */ int bcmgenet_mii_init(struct net_device *dev); -int bcmgenet_mii_connect(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev, bool init); +int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index fcd181ae3a7d..dbe18cdf6c1b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -173,46 +173,6 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_fixed_phy_link_update); } -int bcmgenet_mii_connect(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device_node *dn = priv->pdev->dev.of_node; - struct phy_device *phydev; - u32 phy_flags = 0; - int ret; - - /* Communicate the integrated PHY revision */ - if (priv->internal_phy) - phy_flags = priv->gphy_rev; - - /* Initialize link state variables that bcmgenet_mii_setup() uses */ - priv->old_link = -1; - priv->old_speed = -1; - priv->old_duplex = -1; - priv->old_pause = -1; - - if (dn) { - phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, - phy_flags, priv->phy_interface); - if (!phydev) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } else { - phydev = dev->phydev; - phydev->dev_flags = phy_flags; - - ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, - priv->phy_interface); - if (ret) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } - - return 0; -} - int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -339,21 +299,71 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } - if (init) { - linkmode_copy(phydev->advertising, phydev->supported); + if (init) + dev_info(kdev, "configuring instance for %s\n", phy_name); - /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs. On GENETv5 there is a hardware issue - * that prevents the signaling of link UP interrupts when - * the link operates at 10Mbps, so fallback to polling for - * those versions of GENET. - */ - if (priv->internal_phy && !GENET_IS_V5(priv)) - phydev->irq = PHY_IGNORE_INTERRUPT; + return 0; +} - dev_info(kdev, "configuring instance for %s\n", phy_name); +int bcmgenet_mii_probe(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + struct device_node *dn = priv->pdev->dev.of_node; + struct phy_device *phydev; + u32 phy_flags = 0; + int ret; + + /* Communicate the integrated PHY revision */ + if (priv->internal_phy) + phy_flags = priv->gphy_rev; + + /* Initialize link state variables that bcmgenet_mii_setup() uses */ + priv->old_link = -1; + priv->old_speed = -1; + priv->old_duplex = -1; + priv->old_pause = -1; + + if (dn) { + phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, + phy_flags, priv->phy_interface); + if (!phydev) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } else { + phydev = dev->phydev; + phydev->dev_flags = phy_flags; + + ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, + priv->phy_interface); + if (ret) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } + + /* Configure port multiplexer based on what the probed PHY device since + * reading the 'max-speed' property determines the maximum supported + * PHY speed which is needed for bcmgenet_mii_config() to configure + * things appropriately. + */ + ret = bcmgenet_mii_config(dev, true); + if (ret) { + phy_disconnect(dev->phydev); + return ret; } + linkmode_copy(phydev->advertising, phydev->supported); + + /* The internal PHY has its link interrupts routed to the + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. + */ + if (priv->internal_phy && !GENET_IS_V5(priv)) + dev->phydev->irq = PHY_IGNORE_INTERRUPT; + return 0; } -- cgit From 0686bd9d5e6863f60e4bb1e78e6fe7bb217a0890 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 5 Nov 2019 11:07:26 -0800 Subject: net: bcmgenet: reapply manual settings to the PHY The phy_init_hw() function may reset the PHY to a configuration that does not match manual network settings stored in the phydev structure. If the phy state machine is polled rather than event driven this can create a timing hazard where the phy state machine might alter the settings stored in the phydev structure from the value read from the BMCR. This commit follows invocations of phy_init_hw() by the bcmgenet driver with invocations of the genphy_config_aneg() function to ensure that the BMCR is written to match the settings held in the phydev structure. This prevents the risk of manual settings being accidentally altered. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b5255dd08265..1de51811fcb4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2612,8 +2612,10 @@ static void bcmgenet_irq_task(struct work_struct *work) spin_unlock_irq(&priv->lock); if (status & UMAC_IRQ_PHY_DET_R && - priv->dev->phydev->autoneg != AUTONEG_ENABLE) + priv->dev->phydev->autoneg != AUTONEG_ENABLE) { phy_init_hw(priv->dev->phydev); + genphy_config_aneg(priv->dev->phydev); + } /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) @@ -3634,6 +3636,7 @@ static int bcmgenet_resume(struct device *d) phy_init_hw(dev->phydev); /* Speed settings must be restored */ + genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); bcmgenet_set_hw_addr(priv, dev->dev_addr); -- cgit From f382b0df6946d48fae80a2201ccff43b41382099 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 28 Oct 2019 19:13:58 +0200 Subject: net/mlx5e: Fix eswitch debug print of max fdb flow The value is already the calculation so remove the log prefix. Fixes: e52c28024008 ("net/mlx5: E-Switch, Add chains and priorities") Signed-off-by: Roi Dayan Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 369499e88fe8..9004a07e457a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1079,7 +1079,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) MLX5_CAP_GEN(dev, max_flow_counter_15_0); fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n", + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max); -- cgit From 22f83150f03e119f61f4d00d6884639871ba6859 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 29 Oct 2019 13:28:17 +0200 Subject: net/mlx5: DR, Fix memory leak in modify action destroy The rewrite data was no freed. Fixes: 9db810ed2d37 ("net/mlx5: DR, Expose steering action functionality") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index b74b7d0f6590..004c56c2fc0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1577,6 +1577,7 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) break; case DR_ACTION_TYP_MODIFY_HDR: mlx5dr_icm_free_chunk(action->rewrite.chunk); + kfree(action->rewrite.data); refcount_dec(&action->rewrite.dmn->refcount); break; default: -- cgit From 260986fcff81a0c147a5d169f1ad52aab95804d5 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 29 Oct 2019 18:18:10 +0200 Subject: net/mlx5: DR, Fix memory leak during rule creation During rule creation hw_ste_arr was not freed. Fixes: 41d07074154c ("net/mlx5: DR, Expose steering rule functionality") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index e8b656075c6f..5dcb8baf491a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -1096,6 +1096,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, if (htbl) mlx5dr_htbl_put(htbl); + kfree(hw_ste_arr); + return 0; free_ste: -- cgit From 950d3af70ea89cf7ac51d734a634174013631192 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Tue, 29 Oct 2019 11:44:24 +0200 Subject: net/mlx5e: Use correct enum to determine uplink port For vlan push action, if eswitch flow source capability is enabled, flow source value compared with MLX5_VPORT_UPLINK enum, to determine uplink port. This lead to syndrome in dmesg if try to add vlan push action. For example: $ tc filter add dev vxlan0 ingress protocol ip prio 1 flower \ enc_dst_port 4789 \ action tunnel_key unset pipe \ action vlan push id 20 pipe \ action mirred egress redirect dev ens1f0_0 $ dmesg ... [ 2456.883693] mlx5_core 0000:82:00.0: mlx5_cmd_check:756:(pid 5273): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xa9c090) Use the correct enum value MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK. Fixes: bb204dcf39fe ("net/mlx5e: Determine source port properly for vlan push action") Signed-off-by: Dmytro Linkin Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 7879e1746297..366bda1bb1c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -183,7 +183,8 @@ static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw, u32 port_mask, port_value; if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) - return spec->flow_context.flow_source == MLX5_VPORT_UPLINK; + return spec->flow_context.flow_source == + MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; port_mask = MLX5_GET(fte_match_param, spec->match_criteria, misc_parameters.source_port); -- cgit From 7afb3e575e5aa9f5a200a3eb3f45d8130f6d6601 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 5 Nov 2019 23:50:13 +0200 Subject: net: mscc: ocelot: don't handle netdev events for other netdevs The check that the event is actually for this device should be moved from the "port" handler to the net device handler. Otherwise the port handler will deny bonding configuration for other net devices in the same system (like enetc in the LS1028A) that don't have the lag_upper_info->tx_type restriction that ocelot has. Fixes: dc96ee3730fc ("net: mscc: ocelot: add bonding support") Signed-off-by: Claudiu Manoil Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 344539c0d3aa..dbf09fcf61f1 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1680,9 +1680,6 @@ static int ocelot_netdevice_port_event(struct net_device *dev, struct ocelot_port *ocelot_port = netdev_priv(dev); int err = 0; - if (!ocelot_netdevice_dev_check(dev)) - return 0; - switch (event) { case NETDEV_CHANGEUPPER: if (netif_is_bridge_master(info->upper_dev)) { @@ -1719,6 +1716,9 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ret = 0; + if (!ocelot_netdevice_dev_check(dev)) + return 0; + if (event == NETDEV_PRECHANGEUPPER && netif_is_lag_master(info->upper_dev)) { struct netdev_lag_upper_info *lag_upper_info = info->upper_info; -- cgit From 3b3eed8eec47259939ee6c3d58aea1c311ddee3b Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 5 Nov 2019 23:50:14 +0200 Subject: net: mscc: ocelot: fix NULL pointer on LAG slave removal lag_upper_info may be NULL on slave removal. Fixes: dc96ee3730fc ("net: mscc: ocelot: add bonding support") Signed-off-by: Claudiu Manoil Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index dbf09fcf61f1..672ea1342add 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1724,7 +1724,8 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct netdev_lag_upper_info *lag_upper_info = info->upper_info; struct netlink_ext_ack *extack; - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + if (lag_upper_info && + lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { extack = netdev_notifier_info_to_extack(&info->info); NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); -- cgit From 17fdd7638cb687cd7f15a48545f25d738f0101e0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 6 Nov 2019 00:01:40 +0200 Subject: net: mscc: ocelot: fix __ocelot_rmw_ix prototype The "read-modify-write register index" function is declared with a confusing prototype: the "mask" and "reg" arguments are swapped. Fortunately, this does not affect callers so far. Both arguments are u32, and the wrapper macros (ocelot_rmw_ix etc) have the arguments in the correct order (the one from ocelot_io.c). Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index e40773c01a44..06ac806052bc 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -523,7 +523,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset); #define ocelot_write_rix(ocelot, val, reg, ri) __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri)) #define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0) -void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 mask, +void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, u32 offset); #define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri)) #define ocelot_rmw_gix(ocelot, val, m, reg, gi) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi)) -- cgit From 02b1fa07bb58f5d1f349b5b09eb936739a7b20fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 14:24:34 -0800 Subject: net/tls: don't pay attention to sk_write_pending when pushing partial records sk_write_pending being not zero does not guarantee that partial record will be pushed. If the thread waiting for memory times out the pending record may get stuck. In case of tls_device there is no path where parial record is set and writer present in the first place. Partial record is set only in tls_push_sg() and tls_push_sg() will return an error immediately. All tls_device callers of tls_push_sg() will return (and not wait for memory) if it failed. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_device.c | 4 +++- net/tls/tls_sw.c | 9 +++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f959487c5cd1..5a3715ddc592 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -623,9 +623,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) void tls_device_write_space(struct sock *sk, struct tls_context *ctx) { - if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) { + if (tls_is_partially_sent_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; + WARN_ON_ONCE(sk->sk_write_pending); + sk->sk_allocation = GFP_ATOMIC; tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL | diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c2b5e0d2ba1a..e155b792df0b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2180,12 +2180,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); /* Schedule the transmission if tx list is ready */ - if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) { - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, - &tx_ctx->tx_bitmask)) - schedule_delayed_work(&tx_ctx->tx_work.work, 0); - } + if (is_tx_ready(tx_ctx) && + !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) + schedule_delayed_work(&tx_ctx->tx_work.work, 0); } void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) -- cgit From 79ffe6087e9145d2377385cac48d0d6a6b4225a5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 14:24:35 -0800 Subject: net/tls: add a TX lock TLS TX needs to release and re-acquire the socket lock if send buffer fills up. TLS SW TX path currently depends on only allowing one thread to enter the function by the abuse of sk_write_pending. If another writer is already waiting for memory no new ones are allowed in. This has two problems: - writers don't wake other threads up when they leave the kernel; meaning that this scheme works for single extra thread (second application thread or delayed work) because memory becoming available will send a wake up request, but as Mallesham and Pooja report with larger number of threads it leads to threads being put to sleep indefinitely; - the delayed work does not get _scheduled_ but it may _run_ when other writers are present leading to crashes as writers don't expect state to change under their feet (same records get pushed and freed multiple times); it's hard to reliably bail from the work, however, because the mere presence of a writer does not guarantee that the writer will push pending records before exiting. Ensuring wakeups always happen will make the code basically open code a mutex. Just use a mutex. The TLS HW TX path does not have any locking (not even the sk_write_pending hack), yet it uses a per-socket sg_tx_data array to push records. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Mallesham Jatharakonda Reported-by: Pooja Trivedi Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/tls.h | 5 +++++ net/tls/tls_device.c | 6 ++++++ net/tls/tls_main.c | 2 ++ net/tls/tls_sw.c | 21 +++++++-------------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index c664e6dba0d1..794e297483ea 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -269,6 +270,10 @@ struct tls_context { bool in_tcp_sendpages; bool pending_open_record_frags; + + struct mutex tx_lock; /* protects partially_sent_* fields and + * per-type TX fields + */ unsigned long flags; /* cache cold stuff */ diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 5a3715ddc592..683d00837693 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -523,8 +523,10 @@ last_record: int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; + struct tls_context *tls_ctx = tls_get_ctx(sk); int rc; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (unlikely(msg->msg_controllen)) { @@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) out: release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return rc; } int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; char *kaddr = kmap(page); struct kvec iov; @@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, if (flags & MSG_SENDPAGE_NOTLAST) flags |= MSG_MORE; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); if (flags & MSG_OOB) { @@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, out: release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return rc; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ac88877dcade..0775ae40fcfb 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx) memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); + mutex_destroy(&ctx->tx_lock); if (sk) kfree_rcu(ctx, rcu); @@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk) if (!ctx) return NULL; + mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = sk->sk_prot; return ctx; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e155b792df0b..446f23c1f3ce 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -ENOTSUPP; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); - /* Wait till there is any pending write on socket */ - if (unlikely(sk->sk_write_pending)) { - ret = wait_on_pending_writer(sk, &timeo); - if (unlikely(ret)) - goto send_end; - } - if (unlikely(msg->msg_controllen)) { ret = tls_proccess_cmsg(sk, msg, &record_type); if (ret) { @@ -1091,6 +1085,7 @@ send_end: ret = sk_stream_error(sk, msg->msg_flags, ret); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return copied ? copied : ret; } @@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - /* Wait till there is any pending write on socket */ - if (unlikely(sk->sk_write_pending)) { - ret = wait_on_pending_writer(sk, &timeo); - if (unlikely(ret)) - goto sendpage_end; - } - /* Call the sk_stream functions to manage the sndbuf mem. */ while (size > 0) { size_t copy, required_size; @@ -1219,15 +1207,18 @@ sendpage_end: int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { + struct tls_context *tls_ctx = tls_get_ctx(sk); int ret; if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) return -ENOTSUPP; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); ret = tls_sw_do_sendpage(sk, page, offset, size, flags); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); return ret; } @@ -2170,9 +2161,11 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); tls_tx_records(sk, -1); release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); } void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) -- cgit From 41098af59d8d753aa8d3bb4310cc4ecb61fc82c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2019 14:24:36 -0800 Subject: selftests/tls: add test for concurrent recv and send Add a test which spawns 16 threads and performs concurrent send and recv calls on the same socket. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 108 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4c285b6e1db8..1c8f194d6556 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -898,6 +898,114 @@ TEST_F(tls, nonblocking) } } +static void +test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self, + bool sendpg, unsigned int n_readers, unsigned int n_writers) +{ + const unsigned int n_children = n_readers + n_writers; + const size_t data = 6 * 1000 * 1000; + const size_t file_sz = data / 100; + size_t read_bias, write_bias; + int i, fd, child_id; + char buf[file_sz]; + pid_t pid; + + /* Only allow multiples for simplicity */ + ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true); + read_bias = n_writers / n_readers ?: 1; + write_bias = n_readers / n_writers ?: 1; + + /* prep a file to send */ + fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600); + ASSERT_GE(fd, 0); + + memset(buf, 0xac, file_sz); + ASSERT_EQ(write(fd, buf, file_sz), file_sz); + + /* spawn children */ + for (child_id = 0; child_id < n_children; child_id++) { + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) + break; + } + + /* parent waits for all children */ + if (pid) { + for (i = 0; i < n_children; i++) { + int status; + + wait(&status); + EXPECT_EQ(status, 0); + } + + return; + } + + /* Split threads for reading and writing */ + if (child_id < n_readers) { + size_t left = data * read_bias; + char rb[8001]; + + while (left) { + int res; + + res = recv(self->cfd, rb, + left > sizeof(rb) ? sizeof(rb) : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } else { + size_t left = data * write_bias; + + while (left) { + int res; + + ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0); + if (sendpg) + res = sendfile(self->fd, fd, NULL, + left > file_sz ? file_sz : left); + else + res = send(self->fd, buf, + left > file_sz ? file_sz : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } +} + +TEST_F(tls, mutliproc_even) +{ + test_mutliproc(_metadata, self, false, 6, 6); +} + +TEST_F(tls, mutliproc_readers) +{ + test_mutliproc(_metadata, self, false, 4, 12); +} + +TEST_F(tls, mutliproc_writers) +{ + test_mutliproc(_metadata, self, false, 10, 2); +} + +TEST_F(tls, mutliproc_sendpage_even) +{ + test_mutliproc(_metadata, self, true, 6, 6); +} + +TEST_F(tls, mutliproc_sendpage_readers) +{ + test_mutliproc(_metadata, self, true, 4, 12); +} + +TEST_F(tls, mutliproc_sendpage_writers) +{ + test_mutliproc(_metadata, self, true, 10, 2); +} + TEST_F(tls, control_msg) { if (self->notls) -- cgit From 98f3375505b8d6517bd6710bc6d4f6289eeb30aa Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 6 Nov 2019 10:49:57 +0100 Subject: net/smc: fix ethernet interface refcounting If a pnet table entry is to be added mentioning a valid ethernet interface, but an invalid infiniband or ISM device, the dev_put() operation for the ethernet interface is called twice, resulting in a negative refcount for the ethernet interface, which disables removal of such a network interface. This patch removes one of the dev_put() calls. Fixes: 890a2cb4a966 ("net/smc: rework pnet table") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 2920b006f65c..571e6d84da3b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -376,8 +376,6 @@ static int smc_pnet_fill_entry(struct net *net, return 0; error: - if (pnetelem->ndev) - dev_put(pnetelem->ndev); return rc; } -- cgit From 4d7c47e34fab0d25790bb6e85b85e26fdf0090d5 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:55 +0100 Subject: net: stmmac: gmac4: bitrev32 returns u32 The bitrev32 function returns an u32 var, not an int. Fix it. Fixes: 477286b53f55 ("stmmac: add GMAC4 core support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 5a7b0aca1d31..66e60c7e9850 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -432,7 +432,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, * bits used depends on the hardware configuration * selected at core configuration time. */ - int bit_nr = bitrev32(~crc32_le(~0, ha->addr, + u32 bit_nr = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)) >> (32 - mcbitslog2); /* The most significant bit determines the register to * use (H/L) while the other 5 bits determine the bit -- cgit From 3d00e45d498fd5347cea653ef494c56731b651e0 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:56 +0100 Subject: net: stmmac: xgmac: bitrev32 returns u32 The bitrev32 function returns an u32 var, not an int. Fix it. Fixes: 0efedbf11f07 ("net: stmmac: xgmac: Fix XGMAC selftests") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 5031398e612c..01075a955c66 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -463,7 +463,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, value |= XGMAC_FILTER_HMC; netdev_for_each_mc_addr(ha, dev) { - int nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >> + u32 nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >> (32 - mcbitslog2)); mc_filter[nr >> 5] |= (1 << (nr & 0x1F)); } -- cgit From eeb9d745169847ecde4aa311eb618beb984629b0 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:57 +0100 Subject: net: stmmac: selftests: Prevent false positives in filter tests In L2 tests that filter packets by destination MAC address we need to prevent false positives that can occur if we add an address that collides with the existing ones. To fix this, lets manually check if the new address to be added is already present in the NIC and use a different one if so. For Hash filtering this also envolves converting the address to the hash. Fixes: 091810dbded9 ("net: stmmac: Introduce selftests support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 134 +++++++++++++++------ 1 file changed, 94 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index e4ac3c401432..ac3f658105c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -6,7 +6,9 @@ * Author: Jose Abreu */ +#include #include +#include #include #include #include @@ -485,12 +487,48 @@ static int stmmac_filter_check(struct stmmac_priv *priv) return -EOPNOTSUPP; } +static bool stmmac_hash_check(struct stmmac_priv *priv, unsigned char *addr) +{ + int mc_offset = 32 - priv->hw->mcast_bits_log2; + struct netdev_hw_addr *ha; + u32 hash, hash_nr; + + /* First compute the hash for desired addr */ + hash = bitrev32(~crc32_le(~0, addr, 6)) >> mc_offset; + hash_nr = hash >> 5; + hash = 1 << (hash & 0x1f); + + /* Now, check if it collides with any existing one */ + netdev_for_each_mc_addr(ha, priv->dev) { + u32 nr = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)) >> mc_offset; + if (((nr >> 5) == hash_nr) && ((1 << (nr & 0x1f)) == hash)) + return false; + } + + /* No collisions, address is good to go */ + return true; +} + +static bool stmmac_perfect_check(struct stmmac_priv *priv, unsigned char *addr) +{ + struct netdev_hw_addr *ha; + + /* Check if it collides with any existing one */ + netdev_for_each_uc_addr(ha, priv->dev) { + if (!memcmp(ha->addr, addr, ETH_ALEN)) + return false; + } + + /* No collisions, address is good to go */ + return true; +} + static int stmmac_test_hfilt(struct stmmac_priv *priv) { - unsigned char gd_addr[ETH_ALEN] = {0x01, 0xee, 0xdd, 0xcc, 0xbb, 0xaa}; - unsigned char bd_addr[ETH_ALEN] = {0x01, 0x01, 0x02, 0x03, 0x04, 0x05}; + unsigned char gd_addr[ETH_ALEN] = {0xf1, 0xee, 0xdd, 0xcc, 0xbb, 0xaa}; + unsigned char bd_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; ret = stmmac_filter_check(priv); if (ret) @@ -499,6 +537,16 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv) if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins) return -EOPNOTSUPP; + while (--tries) { + /* We only need to check the bd_addr for collisions */ + bd_addr[ETH_ALEN - 1] = tries; + if (stmmac_hash_check(priv, bd_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; + ret = dev_mc_add(priv->dev, gd_addr); if (ret) return ret; @@ -523,13 +571,25 @@ cleanup: static int stmmac_test_pfilt(struct stmmac_priv *priv) { - unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; - unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55}; + unsigned char gd_addr[ETH_ALEN] = {0xf0, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char bd_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; if (stmmac_filter_check(priv)) return -EOPNOTSUPP; + if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries) + return -EOPNOTSUPP; + + while (--tries) { + /* We only need to check the bd_addr for collisions */ + bd_addr[ETH_ALEN - 1] = tries; + if (stmmac_perfect_check(priv, bd_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; ret = dev_uc_add(priv->dev, gd_addr); if (ret) @@ -553,39 +613,31 @@ cleanup: return ret; } -static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr) -{ - return 0; -} - -static void stmmac_test_set_rx_mode(struct net_device *netdev) -{ - /* As we are in test mode of ethtool we already own the rtnl lock - * so no address will change from user. We can just call the - * ndo_set_rx_mode() callback directly */ - if (netdev->netdev_ops->ndo_set_rx_mode) - netdev->netdev_ops->ndo_set_rx_mode(netdev); -} - static int stmmac_test_mcfilt(struct stmmac_priv *priv) { - unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; - unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff}; + unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; if (stmmac_filter_check(priv)) return -EOPNOTSUPP; - if (!priv->hw->multicast_filter_bins) + if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries) return -EOPNOTSUPP; - /* Remove all MC addresses */ - __dev_mc_unsync(priv->dev, NULL); - stmmac_test_set_rx_mode(priv->dev); + while (--tries) { + /* We only need to check the mc_addr for collisions */ + mc_addr[ETH_ALEN - 1] = tries; + if (stmmac_hash_check(priv, mc_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; ret = dev_uc_add(priv->dev, uc_addr); if (ret) - goto cleanup; + return ret; attr.dst = uc_addr; @@ -602,30 +654,34 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv) cleanup: dev_uc_del(priv->dev, uc_addr); - __dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL); - stmmac_test_set_rx_mode(priv->dev); return ret; } static int stmmac_test_ucfilt(struct stmmac_priv *priv) { - unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; - unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff}; + unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff}; struct stmmac_packet_attrs attr = { }; - int ret; + int ret, tries = 256; if (stmmac_filter_check(priv)) return -EOPNOTSUPP; - if (!priv->hw->multicast_filter_bins) + if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins) return -EOPNOTSUPP; - /* Remove all UC addresses */ - __dev_uc_unsync(priv->dev, NULL); - stmmac_test_set_rx_mode(priv->dev); + while (--tries) { + /* We only need to check the uc_addr for collisions */ + uc_addr[ETH_ALEN - 1] = tries; + if (stmmac_perfect_check(priv, uc_addr)) + break; + } + + if (!tries) + return -EOPNOTSUPP; ret = dev_mc_add(priv->dev, mc_addr); if (ret) - goto cleanup; + return ret; attr.dst = mc_addr; @@ -642,8 +698,6 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv) cleanup: dev_mc_del(priv->dev, mc_addr); - __dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL); - stmmac_test_set_rx_mode(priv->dev); return ret; } -- cgit From 96147375d49f9b523e6462992ddcfb753835c42f Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:58 +0100 Subject: net: stmmac: xgmac: Only get SPH header len if available Split Header length is only available when L34T == 0. Fix this by correctly checking if L34T is zero before trying to get Header length. Fixes: 67afd6d1cfdf ("net: stmmac: Add Split Header support and enable it in XGMAC cores") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index ae48154f933c..bd5838ce1e8a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -288,7 +288,8 @@ static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash, static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len) { - *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL; + if (le32_to_cpu(p->des3) & XGMAC_RDES3_L34T) + *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL; return 0; } -- cgit From 97add93fbcfa566735d6a4b96684110d356ebd35 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:02:59 +0100 Subject: net: stmmac: xgmac: Fix TSA selection When we change between Transmission Scheduling Algorithms, we need to clear previous values so that the new chosen algorithm is correctly selected. Fixes: ec6ea8e3eee9 ("net: stmmac: Add CBS support in XGMAC2") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 01075a955c66..070bd7d1ae4c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -224,6 +224,7 @@ static void dwxgmac2_config_cbs(struct mac_device_info *hw, writel(low_credit, ioaddr + XGMAC_MTL_TCx_LOCREDIT(queue)); value = readl(ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue)); + value &= ~XGMAC_TSA; value |= XGMAC_CC | XGMAC_CBS; writel(value, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue)); } -- cgit From 08c1ac3bcba8cd52449f55a065e60779a0fa2c97 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:00 +0100 Subject: net: stmmac: xgmac: Fix AV Feature detection Fix incorrect precedence of operators. For reference: AV implies AV Feature but RAV implies only RX side AV Feature. As we want full AV features we need to check RAV. Fixes: c2b69474d63b ("net: stmmac: xgmac: Correct RAVSEL field interpretation") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 965cbe3e6f51..2e814aa64a5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -369,7 +369,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13; dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12; dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11; - dma_cap->av &= !(hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10; + dma_cap->av &= !((hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10); dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9; dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8; dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7; -- cgit From 132f2f20c9866325d12c155aca06d260f358d3cb Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:01 +0100 Subject: net: stmmac: xgmac: Disable Flow Control when 1 or more queues are in AV When in AVB mode we need to disable flow control to prevent MAC from pausing in TX side. Fixes: ec6ea8e3eee9 ("net: stmmac: Add CBS support in XGMAC2") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 2e814aa64a5c..f70ca5300b82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -470,6 +470,7 @@ static void dwxgmac2_enable_tso(void __iomem *ioaddr, bool en, u32 chan) static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode) { u32 value = readl(ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); + u32 flow = readl(ioaddr + XGMAC_RX_FLOW_CTRL); value &= ~XGMAC_TXQEN; if (qmode != MTL_QUEUE_AVB) { @@ -477,6 +478,7 @@ static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode) writel(0, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(channel)); } else { value |= 0x1 << XGMAC_TXQEN_SHIFT; + writel(flow & (~XGMAC_RFE), ioaddr + XGMAC_RX_FLOW_CTRL); } writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel)); -- cgit From aeb18dd0769289ae9141221862319a36094fb06b Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:02 +0100 Subject: net: stmmac: xgmac: Disable MMC interrupts by default MMC interrupts were being enabled, which is not what we want because it will lead to a storm of interrupts that are not handled at all. Fix it by disabling all MMC interrupts for XGMAC. Fixes: b6cdf09f51c2 ("net: stmmac: xgmac: Implement MMC counters") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/mmc_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index a223584f5f9a..252cf48c5816 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -176,6 +176,7 @@ #define MMC_XGMAC_RX_PKT_SMD_ERR 0x22c #define MMC_XGMAC_RX_PKT_ASSEMBLY_OK 0x230 #define MMC_XGMAC_RX_FPE_FRAG 0x234 +#define MMC_XGMAC_RX_IPC_INTR_MASK 0x25c static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) { @@ -333,8 +334,9 @@ static void dwxgmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr) { - writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK); - writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK); + writel(0x0, mmcaddr + MMC_RX_INTR_MASK); + writel(0x0, mmcaddr + MMC_TX_INTR_MASK); + writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK); } static void dwxgmac_read_mmc_reg(void __iomem *addr, u32 reg, u32 *dest) -- cgit From cda4985a3ebb95f14fe3a9dc905820417baf79a5 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:03 +0100 Subject: net: stmmac: Fix the packet count in stmmac_rx() Currently, stmmac_rx() is counting the number of descriptors but it should count the number of packets as specified by the NAPI limit. Fix this. Fixes: ec222003bd94 ("net: stmmac: Prepare to add Split Header support") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4e9c848c67cc..1ab1eea1556a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3506,8 +3506,6 @@ read_again: if (unlikely(status & dma_own)) break; - count++; - rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE); next_entry = rx_q->cur_rx; @@ -3534,6 +3532,7 @@ read_again: goto read_again; if (unlikely(error)) { dev_kfree_skb(skb); + count++; continue; } @@ -3573,6 +3572,7 @@ read_again: skb = napi_alloc_skb(&ch->rx_napi, len); if (!skb) { priv->dev->stats.rx_dropped++; + count++; continue; } @@ -3638,6 +3638,7 @@ read_again: priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += len; + count++; } if (status & rx_not_ls) { -- cgit From b2f071995b7a4f175467d683d4d18eab10810020 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:04 +0100 Subject: net: stmmac: Fix TSO descriptor with Enhanced Addressing When using addressing > 32 bits the TSO first descriptor only has the header so we can't set the payload field for this descriptor. Let's reset the variable so that buffer 2 value is zero. Fixes: a993db88d17d ("net: stmmac: Enable support for > 32 Bits addressing in XGMAC") Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1ab1eea1556a..b0a16d7c6e3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2996,6 +2996,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_desc_addr(priv, first, des); tmp_pay_len = pay_len; des += proto_hdr_len; + pay_len = 0; } stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); -- cgit From 7df4a3a76d34fe1f550e82c6ef368c7c40f0aaa4 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 6 Nov 2019 16:03:05 +0100 Subject: net: stmmac: Fix the TX IOC in xmit path IOC bit must be only set in the last descriptor. Move the logic up a little bit to make sure it's set in the correct descriptor. Signed-off-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 64 ++++++++++++----------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b0a16d7c6e3d..f826365c979d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3024,6 +3024,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[tx_q->cur_tx] = skb; + /* Manage tx mitigation */ + tx_q->tx_count_frames += nfrags + 1; + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + desc = &tx_q->dma_tx[tx_q->cur_tx]; + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, desc); + priv->xstats.tx_set_ic_bit++; + } + /* We've used all descriptors we need for this skb, however, * advance cur_tx so that it references a fresh descriptor. * ndo_start_xmit will fill this descriptor the next time it's @@ -3041,19 +3054,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) priv->xstats.tx_tso_frames++; priv->xstats.tx_tso_nfrags += nfrags; - /* Manage tx mitigation */ - tx_q->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && - !(priv->synopsys_id >= DWMAC_CORE_4_00 && - (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - stmmac_tx_timer_arm(priv, queue); - } else { - tx_q->tx_count_frames = 0; - stmmac_set_tx_ic(priv, desc); - priv->xstats.tx_set_ic_bit++; - } - if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -3225,6 +3225,27 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[entry] = skb; + /* According to the coalesce parameter the IC bit for the latest + * segment is reset and the timer re-started to clean the tx status. + * This approach takes care about the fragments: desc is the first + * element in case of no SG. + */ + tx_q->tx_count_frames += nfrags + 1; + if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && + !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en)) { + stmmac_tx_timer_arm(priv, queue); + } else { + if (likely(priv->extend_desc)) + desc = &tx_q->dma_etx[entry].basic; + else + desc = &tx_q->dma_tx[entry]; + + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, desc); + priv->xstats.tx_set_ic_bit++; + } + /* We've used all descriptors we need for this skb, however, * advance cur_tx so that it references a fresh descriptor. * ndo_start_xmit will fill this descriptor the next time it's @@ -3260,23 +3281,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; - /* According to the coalesce parameter the IC bit for the latest - * segment is reset and the timer re-started to clean the tx status. - * This approach takes care about the fragments: desc is the first - * element in case of no SG. - */ - tx_q->tx_count_frames += nfrags + 1; - if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) && - !(priv->synopsys_id >= DWMAC_CORE_4_00 && - (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - priv->hwts_tx_en)) { - stmmac_tx_timer_arm(priv, queue); - } else { - tx_q->tx_count_frames = 0; - stmmac_set_tx_ic(priv, desc); - priv->xstats.tx_set_ic_bit++; - } - if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); -- cgit From 9c6850fea3edefef6e7153b2c466f09155399882 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2019 21:51:31 +0100 Subject: r8169: fix page read in r8168g_mdio_read Functions like phy_modify_paged() read the current page, on Realtek PHY's this means reading the value of register 0x1f. Add special handling for reading this register, similar to what we do already in r8168g_mdio_write(). Currently we read a random value that by chance seems to be 0 always. Fixes: a2928d28643e ("r8169: use paged versions of phylib MDIO access functions") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5064c292b873..c4e961ea44d5 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -916,6 +916,9 @@ static void r8168g_mdio_write(struct rtl8169_private *tp, int reg, int value) static int r8168g_mdio_read(struct rtl8169_private *tp, int reg) { + if (reg == 0x1f) + return tp->ocp_base == OCP_STD_PHY_BASE ? 0 : tp->ocp_base >> 4; + if (tp->ocp_base != OCP_STD_PHY_BASE) reg -= 0x10; -- cgit From 648db0514a3808eead513d7abfaf650d02a64d5c Mon Sep 17 00:00:00 2001 From: Huazhong Tan Date: Thu, 7 Nov 2019 09:30:19 +0800 Subject: net: hns3: add compatible handling for command HCLGE_OPC_PF_RST_DONE Since old firmware does not support HCLGE_OPC_PF_RST_DONE, it will return -EOPNOTSUPP to the driver when received this command. So for this case, it should just print a warning and return success to the caller. Fixes: 72e2fb07997c ("net: hns3: clear reset interrupt status in hclge_irq_handle()") Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e02e01bd9eff..16f7d0e15b4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3587,12 +3587,28 @@ static int hclge_set_rst_done(struct hclge_dev *hdev) { struct hclge_pf_rst_done_cmd *req; struct hclge_desc desc; + int ret; req = (struct hclge_pf_rst_done_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PF_RST_DONE, false); req->pf_rst_done |= HCLGE_PF_RESET_DONE_BIT; - return hclge_cmd_send(&hdev->hw, &desc, 1); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + /* To be compatible with the old firmware, which does not support + * command HCLGE_OPC_PF_RST_DONE, just print a warning and + * return success + */ + if (ret == -EOPNOTSUPP) { + dev_warn(&hdev->pdev->dev, + "current firmware does not support command(0x%x)!\n", + HCLGE_OPC_PF_RST_DONE); + return 0; + } else if (ret) { + dev_err(&hdev->pdev->dev, "assert PF reset done fail %d!\n", + ret); + } + + return ret; } static int hclge_reset_prepare_up(struct hclge_dev *hdev) -- cgit From 99a8efbb6e30b72ac98cecf81103f847abffb1e5 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 7 Nov 2019 09:33:20 +0800 Subject: NFC: st21nfca: fix double free The variable nfcid_skb is not changed in the callee nfc_hci_get_param() if error occurs. Consequently, the freed variable nfcid_skb will be freed again, resulting in a double free bug. Set nfcid_skb to NULL after releasing it to fix the bug. Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/nfc/st21nfca/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index f9ac176cf257..2ce17932a073 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -708,6 +708,7 @@ static int st21nfca_hci_complete_target_discovered(struct nfc_hci_dev *hdev, NFC_PROTO_FELICA_MASK; } else { kfree_skb(nfcid_skb); + nfcid_skb = NULL; /* P2P in type A */ r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_F_GATE, ST21NFCA_RF_READER_F_NFCID1, -- cgit From 025ec40b81d785a98f76b8bdb509ac10773b4f12 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 7 Nov 2019 14:29:50 +0800 Subject: nfc: netlink: fix double device reference drop The function nfc_put_device(dev) is called twice to drop the reference to dev when there is no associated local llcp. Remove one of them to fix the bug. Fixes: 52feb444a903 ("NFC: Extend netlink interface for LTO, RW, and MIUX parameters support") Fixes: d9b8d8e19b07 ("NFC: llcp: Service Name Lookup netlink interface") Signed-off-by: Pan Bian Reviewed-by: Johan Hovold Signed-off-by: David S. Miller --- net/nfc/netlink.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 17e6ca62f1be..afde0d763039 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1099,7 +1099,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } @@ -1159,7 +1158,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } -- cgit From 332f989a3b0041b810836c5c3747e59aad7e9d0b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 7 Nov 2019 09:48:01 +0100 Subject: CDC-NCM: handle incomplete transfer of MTU A malicious device may give half an answer when asked for its MTU. The driver will proceed after this with a garbage MTU. Anything but a complete answer must be treated as an error. V2: used sizeof as request by Alexander Reported-and-tested-by: syzbot+0631d878823ce2411636@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 00cab3f43a4c..a245597a3902 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -578,8 +578,8 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) /* read current mtu value from device */ err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); - if (err < 0) { + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); + if (err < sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } @@ -590,7 +590,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) max_datagram_size = cpu_to_le16(ctx->max_datagram_size); err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); if (err < 0) dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n"); -- cgit From e497df686e8fed8c1dd69179010656362858edb3 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Thu, 7 Nov 2019 11:57:01 +0100 Subject: net: usb: qmi_wwan: add support for DW5821e with eSIM support Exactly same layout as the default DW5821e module, just a different vid/pid. The QMI interface is exposed in USB configuration #1: P: Vendor=413c ProdID=81e0 Rev=03.18 S: Manufacturer=Dell Inc. S: Product=DW5821e-eSIM Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#=0x1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 596428ec71df..56d334b9ad45 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1362,6 +1362,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ -- cgit From bf5a6b4c474c589244dc25ee1af2c3c829228ef8 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Thu, 7 Nov 2019 17:09:53 +0000 Subject: net: hns: Fix the stray netpoll locks causing deadlock in NAPI path This patch fixes the problem of the spin locks, originally meant for the netpoll path of hns driver, causing deadlock in the normal NAPI poll path. The issue happened due to the presence of the stray leftover spin lock code related to the netpoll, whose support was earlier removed from the HNS[1], got activated due to enabling of NET_POLL_CONTROLLER switch. Earlier background: The netpoll handling code originally had this bug(as identified by Marc Zyngier[2]) of wrong spin lock API being used which did not disable the interrupts and hence could cause locking issues. i.e. if the lock were first acquired in context to thread like 'ip' util and this lock if ever got later acquired again in context to the interrupt context like TX/RX (Interrupts could always pre-empt the lock holding task and acquire the lock again) and hence could cause deadlock. Proposed Solution: 1. If the netpoll was enabled in the HNS driver, which is not right now, we could have simply used spin_[un]lock_irqsave() 2. But as netpoll is disabled, therefore, it is best to get rid of the existing locks and stray code for now. This should solve the problem reported by Marc. [1] https://git.kernel.org/torvalds/c/4bd2c03be7 [2] https://patchwork.ozlabs.org/patch/1189139/ Fixes: 4bd2c03be707 ("net: hns: remove ndo_poll_controller") Cc: lipeng Cc: Yisen Zhuang Cc: Eric Dumazet Cc: David S. Miller Reported-by: Marc Zyngier Acked-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 1 - drivers/net/ethernet/hisilicon/hns/hnae.h | 3 --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 22 +--------------------- 3 files changed, 1 insertion(+), 25 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 6d0457eb4faa..08339278c722 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -199,7 +199,6 @@ hnae_init_ring(struct hnae_queue *q, struct hnae_ring *ring, int flags) ring->q = q; ring->flags = flags; - spin_lock_init(&ring->lock); ring->coal_param = q->handle->coal_param; assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index e9c67c06bfd2..6ab9458302e1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -274,9 +274,6 @@ struct hnae_ring { /* statistic */ struct ring_stats stats; - /* ring lock for poll one */ - spinlock_t lock; - dma_addr_t desc_dma_addr; u32 buf_size; /* size for hnae_desc->addr, preset by AE */ u16 desc_num; /* total number of desc */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index a48396dd4ebb..14ab20491fd0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -943,15 +943,6 @@ static int is_valid_clean_head(struct hnae_ring *ring, int h) return u > c ? (h > c && h <= u) : (h > c || h <= u); } -/* netif_tx_lock will turn down the performance, set only when necessary */ -#ifdef CONFIG_NET_POLL_CONTROLLER -#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock) -#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock) -#else -#define NETIF_TX_LOCK(ring) -#define NETIF_TX_UNLOCK(ring) -#endif - /* reclaim all desc in one budget * return error or number of desc left */ @@ -965,21 +956,16 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, int head; int bytes, pkts; - NETIF_TX_LOCK(ring); - head = readl_relaxed(ring->io_base + RCB_REG_HEAD); rmb(); /* make sure head is ready before touch any data */ - if (is_ring_empty(ring) || head == ring->next_to_clean) { - NETIF_TX_UNLOCK(ring); + if (is_ring_empty(ring) || head == ring->next_to_clean) return 0; /* no data to poll */ - } if (!is_valid_clean_head(ring, head)) { netdev_err(ndev, "wrong head (%d, %d-%d)\n", head, ring->next_to_use, ring->next_to_clean); ring->stats.io_err_cnt++; - NETIF_TX_UNLOCK(ring); return -EIO; } @@ -994,8 +980,6 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, ring->stats.tx_pkts += pkts; ring->stats.tx_bytes += bytes; - NETIF_TX_UNLOCK(ring); - dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index); netdev_tx_completed_queue(dev_queue, pkts, bytes); @@ -1055,16 +1039,12 @@ static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data) int head; int bytes, pkts; - NETIF_TX_LOCK(ring); - head = ring->next_to_use; /* ntu :soft setted ring position*/ bytes = 0; pkts = 0; while (head != ring->next_to_clean) hns_nic_reclaim_one_desc(ring, &bytes, &pkts); - NETIF_TX_UNLOCK(ring); - dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index); netdev_tx_reset_queue(dev_queue); } -- cgit From 1bef4c223b8588cf50433bdc2c6953d82949b3b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 09:26:19 -0800 Subject: ipv6: fixes rt6_probe() and fib6_nh->last_probe init While looking at a syzbot KCSAN report [1], I found multiple issues in this code : 1) fib6_nh->last_probe has an initial value of 0. While probably okay on 64bit kernels, this causes an issue on 32bit kernels since the time_after(jiffies, 0 + interval) might be false ~24 days after boot (for HZ=1000) 2) The data-race found by KCSAN I could use READ_ONCE() and WRITE_ONCE(), but we also can take the opportunity of not piling-up too many rt6_probe_deferred() works by using instead cmpxchg() so that only one cpu wins the race. [1] BUG: KCSAN: data-race in find_match / find_match write to 0xffff8880bb7aabe8 of 8 bytes by interrupt on cpu 1: rt6_probe net/ipv6/route.c:663 [inline] find_match net/ipv6/route.c:757 [inline] find_match+0x5bd/0x790 net/ipv6/route.c:733 __find_rr_leaf+0xe3/0x780 net/ipv6/route.c:831 find_rr_leaf net/ipv6/route.c:852 [inline] rt6_select net/ipv6/route.c:896 [inline] fib6_table_lookup+0x383/0x650 net/ipv6/route.c:2164 ip6_pol_route+0xee/0x5c0 net/ipv6/route.c:2200 ip6_pol_route_output+0x48/0x60 net/ipv6/route.c:2452 fib6_rule_lookup+0x3d6/0x470 net/ipv6/fib6_rules.c:117 ip6_route_output_flags_noref+0x16b/0x230 net/ipv6/route.c:2484 ip6_route_output_flags+0x50/0x1a0 net/ipv6/route.c:2497 ip6_dst_lookup_tail+0x25d/0xc30 net/ipv6/ip6_output.c:1049 ip6_dst_lookup_flow+0x68/0x120 net/ipv6/ip6_output.c:1150 inet6_csk_route_socket+0x2f7/0x420 net/ipv6/inet6_connection_sock.c:106 inet6_csk_xmit+0x91/0x1f0 net/ipv6/inet6_connection_sock.c:121 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] tcp_xmit_probe_skb+0x19b/0x1d0 net/ipv4/tcp_output.c:3735 read to 0xffff8880bb7aabe8 of 8 bytes by interrupt on cpu 0: rt6_probe net/ipv6/route.c:657 [inline] find_match net/ipv6/route.c:757 [inline] find_match+0x521/0x790 net/ipv6/route.c:733 __find_rr_leaf+0xe3/0x780 net/ipv6/route.c:831 find_rr_leaf net/ipv6/route.c:852 [inline] rt6_select net/ipv6/route.c:896 [inline] fib6_table_lookup+0x383/0x650 net/ipv6/route.c:2164 ip6_pol_route+0xee/0x5c0 net/ipv6/route.c:2200 ip6_pol_route_output+0x48/0x60 net/ipv6/route.c:2452 fib6_rule_lookup+0x3d6/0x470 net/ipv6/fib6_rules.c:117 ip6_route_output_flags_noref+0x16b/0x230 net/ipv6/route.c:2484 ip6_route_output_flags+0x50/0x1a0 net/ipv6/route.c:2497 ip6_dst_lookup_tail+0x25d/0xc30 net/ipv6/ip6_output.c:1049 ip6_dst_lookup_flow+0x68/0x120 net/ipv6/ip6_output.c:1150 inet6_csk_route_socket+0x2f7/0x420 net/ipv6/inet6_connection_sock.c:106 inet6_csk_xmit+0x91/0x1f0 net/ipv6/inet6_connection_sock.c:121 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 18894 Comm: udevd Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: cc3a86c802f0 ("ipv6: Change rt6_probe to take a fib6_nh") Fixes: f547fac624be ("ipv6: rate-limit probes for neighbourless routes") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a63ff85fe141..e60bf8e7dd1a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -621,6 +621,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) { struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; + unsigned long last_probe; struct neighbour *neigh; struct net_device *dev; struct inet6_dev *idev; @@ -639,6 +640,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh) nh_gw = &fib6_nh->fib_nh_gw6; dev = fib6_nh->fib_nh_dev; rcu_read_lock_bh(); + last_probe = READ_ONCE(fib6_nh->last_probe); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { @@ -654,13 +656,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else if (time_after(jiffies, fib6_nh->last_probe + + } else if (time_after(jiffies, last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } - if (work) { - fib6_nh->last_probe = jiffies; + if (!work || cmpxchg(&fib6_nh->last_probe, + last_probe, jiffies) != last_probe) { + kfree(work); + } else { INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); @@ -3383,6 +3387,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, int err; fib6_nh->fib_nh_family = AF_INET6; +#ifdef CONFIG_IPV6_ROUTER_PREF + fib6_nh->last_probe = jiffies; +#endif err = -ENODEV; if (cfg->fc_ifindex) { -- cgit From e0a312629fefa943534fc46f7bfbe6de3fdaf463 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 7 Nov 2019 18:29:52 +0000 Subject: ipv4: Fix table id reference in fib_sync_down_addr Hendrik reported routes in the main table using source address are not removed when the address is removed. The problem is that fib_sync_down_addr does not account for devices in the default VRF which are associated with the main table. Fix by updating the table id reference. Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Reported-by: Hendrik Donner Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0913a090b2bf..f1888c683426 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1814,8 +1814,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; + int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); - int tb_id = l3mdev_fib_table(dev); struct fib_info *fi; if (!fib_info_laddrhash || local == 0) -- cgit From 6dd47d9754ff0589715054b11294771f2c9a16ac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Nov 2019 15:41:11 +0100 Subject: mac80211: fix ieee80211_txq_setup_flows() failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If ieee80211_txq_setup_flows() fails, we don't clean up LED state properly, leading to crashes later on, fix that. Fixes: dc8b274f0952 ("mac80211: Move up init of TXQs") Signed-off-by: Johannes Berg Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191105154110.1ccf7112ba5d.I0ba865792446d051867b33153be65ce6b063d98c@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index aba094b4ccfc..2d05c4cfaf6d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1292,8 +1292,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); - ieee80211_led_exit(local); fail_flows: + ieee80211_led_exit(local); destroy_workqueue(local->workqueue); fail_workqueue: wiphy_unregister(local->hw.wiphy); -- cgit From 71e67c3bd127cfe7863f54e4b087eba1cc8f9a7a Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 5 Nov 2019 16:57:50 +0100 Subject: net/fq_impl: Switch to kvmalloc() for memory allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FQ implementation used by mac80211 allocates memory using kmalloc(), which can fail; and Johannes reported that this actually happens in practice. To avoid this, switch the allocation to kvmalloc() instead; this also brings fq_impl in line with all the FQ qdiscs. Fixes: 557fc4a09803 ("fq: add fair queuing framework") Reported-by: Johannes Berg Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20191105155750.547379-1-toke@redhat.com Signed-off-by: Johannes Berg --- include/net/fq_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 107c0d700ed6..38a9a3d1222b 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -313,7 +313,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ - fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); + fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) return -ENOMEM; @@ -331,7 +331,7 @@ static void fq_reset(struct fq *fq, for (i = 0; i < fq->flows_cnt; i++) fq_flow_reset(fq, &fq->flows[i], free_func); - kfree(fq->flows); + kvfree(fq->flows); fq->flows = NULL; } -- cgit From 285531f9e6774e3be71da6673d475ff1a088d675 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 31 Oct 2019 06:12:43 -0600 Subject: mac80211: fix station inactive_time shortly after boot In the first 5 minutes after boot (time of INITIAL_JIFFIES), ieee80211_sta_last_active() returns zero if last_ack is zero. This leads to "inactive time" showing jiffies_to_msecs(jiffies). # iw wlan0 station get fc:ec:da:64:a6:dd Station fc:ec:da:64:a6:dd (on wlan0) inactive time: 4294894049 ms . . connected time: 70 seconds Fix by returning last_rx if last_ack == 0. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20191031121243.27694-1-anzaki@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index bd11fef2139f..8d3a2389b055 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2457,7 +2457,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->status_stats.last_ack || + time_after(stats->last_rx, sta->status_stats.last_ack)) return stats->last_rx; return sta->status_stats.last_ack; } -- cgit From e4dd5608033efe7b6030cde359bfdbaeb73bc22d Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 8 Nov 2019 10:00:44 +0000 Subject: net: ethernet: octeon_mgmt: Account for second possible VLAN header Octeon's input ring-buffer entry has 14 bits-wide size field, so to account for second possible VLAN header max_mtu must be further reduced. Fixes: 109cc16526c6d ("ethernet/cavium: use core min/max MTU checking") Signed-off-by: Alexander Sverdlin Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 0e5de88fd6e8..cdd7e5da4a74 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1499,7 +1499,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->ethtool_ops = &octeon_mgmt_ethtool_ops; netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; - netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM; + netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; mac = of_get_mac_address(pdev->dev.of_node); -- cgit From ad8a7220355d39cddce8eac1cea9677333e8b821 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 8 Nov 2019 17:08:50 +0100 Subject: vsock/virtio: fix sock refcnt holding during the shutdown The "42f5cda5eaf4" commit rightly set SOCK_DONE on peer shutdown, but there is an issue if we receive the SHUTDOWN(RDWR) while the virtio_transport_close_timeout() is scheduled. In this case, when the timeout fires, the SOCK_DONE is already set and the virtio_transport_close_timeout() will not call virtio_transport_reset() and virtio_transport_do_close(). This causes that both sockets remain open and will never be released, preventing the unloading of [virtio|vhost]_transport modules. This patch fixes this issue, calling virtio_transport_reset() and virtio_transport_do_close() when we receive the SHUTDOWN(RDWR) and there is nothing left to read. Fixes: 42f5cda5eaf4 ("vsock/virtio: set SOCK_DONE on peer shutdown") Cc: Stephen Barber Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 481f7f8a1655..fb2060dffb0a 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -947,9 +947,11 @@ virtio_transport_recv_connected(struct sock *sk, if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) vsk->peer_shutdown |= SEND_SHUTDOWN; if (vsk->peer_shutdown == SHUTDOWN_MASK && - vsock_stream_has_data(vsk) <= 0) { - sock_set_flag(sk, SOCK_DONE); - sk->sk_state = TCP_CLOSING; + vsock_stream_has_data(vsk) <= 0 && + !sock_flag(sk, SOCK_DONE)) { + (void)virtio_transport_reset(vsk, NULL); + + virtio_transport_do_close(vsk, true); } if (le32_to_cpu(pkt->hdr.flags)) sk->sk_state_change(sk); -- cgit From 1b53d64435d56902fc234ff2507142d971a09687 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 20:08:19 -0800 Subject: net: fix data-race in neigh_event_send() KCSAN reported the following data-race [1] The fix will also prevent the compiler from optimizing out the condition. [1] BUG: KCSAN: data-race in neigh_resolve_output / neigh_resolve_output write to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 1: neigh_event_send include/net/neighbour.h:443 [inline] neigh_resolve_output+0x78/0x480 net/core/neighbour.c:1474 neigh_output include/net/neighbour.h:511 [inline] ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_output+0xdf/0x210 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:436 [inline] ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:237 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:618 read to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 0: neigh_event_send include/net/neighbour.h:442 [inline] neigh_resolve_output+0x57/0x480 net/core/neighbour.c:1474 neigh_output include/net/neighbour.h:511 [inline] ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228 __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:294 [inline] ip_output+0xdf/0x210 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:436 [inline] ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532 ip_queue_xmit+0x45/0x60 include/net/ip.h:237 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline] __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/neighbour.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 50a67bd6a434..b8452cc0e059 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -439,8 +439,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; - if (neigh->used != now) - neigh->used = now; + if (READ_ONCE(neigh->used) != now) + WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; -- cgit From deabc87111c690097c03765ea017cd500f7376fc Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 8 Nov 2019 02:42:30 -0800 Subject: qede: fix NULL pointer deref in __qede_remove() While rebooting the system with SR-IOV vfs enabled leads to below crash due to recurrence of __qede_remove() on the VF devices (first from .shutdown() flow of the VF itself and another from PF's .shutdown() flow executing pci_disable_sriov()) This patch adds a safeguard in __qede_remove() flow to fix this, so that driver doesn't attempt to remove "already removed" devices. [ 194.360134] BUG: unable to handle kernel NULL pointer dereference at 00000000000008dc [ 194.360227] IP: [] __qede_remove+0x24/0x130 [qede] [ 194.360304] PGD 0 [ 194.360325] Oops: 0000 [#1] SMP [ 194.360360] Modules linked in: tcp_lp fuse tun bridge stp llc devlink bonding ip_set nfnetlink ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi dell_smbios iTCO_wdt iTCO_vendor_support dell_wmi_descriptor dcdbas vfat fat pcc_cpufreq skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd qedr ib_core pcspkr ses enclosure joydev ipmi_ssif sg i2c_i801 lpc_ich mei_me mei wmi ipmi_si ipmi_devintf ipmi_msghandler tpm_crb acpi_pad acpi_power_meter xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32c_intel mgag200 [ 194.361044] qede i2c_algo_bit drm_kms_helper qed syscopyarea sysfillrect nvme sysimgblt fb_sys_fops ttm nvme_core mpt3sas crc8 ptp drm pps_core ahci raid_class scsi_transport_sas libahci libata drm_panel_orientation_quirks nfit libnvdimm dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ip_tables] [ 194.361297] CPU: 51 PID: 7996 Comm: reboot Kdump: loaded Not tainted 3.10.0-1062.el7.x86_64 #1 [ 194.361359] Hardware name: Dell Inc. PowerEdge MX840c/0740HW, BIOS 2.4.6 10/15/2019 [ 194.361412] task: ffff9cea9b360000 ti: ffff9ceabebdc000 task.ti: ffff9ceabebdc000 [ 194.361463] RIP: 0010:[] [] __qede_remove+0x24/0x130 [qede] [ 194.361534] RSP: 0018:ffff9ceabebdfac0 EFLAGS: 00010282 [ 194.361570] RAX: 0000000000000000 RBX: ffff9cd013846098 RCX: 0000000000000000 [ 194.361621] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9cd013846098 [ 194.361668] RBP: ffff9ceabebdfae8 R08: 0000000000000000 R09: 0000000000000000 [ 194.361715] R10: 00000000bfe14201 R11: ffff9ceabfe141e0 R12: 0000000000000000 [ 194.361762] R13: ffff9cd013846098 R14: 0000000000000000 R15: ffff9ceab5e48000 [ 194.361810] FS: 00007f799c02d880(0000) GS:ffff9ceacb0c0000(0000) knlGS:0000000000000000 [ 194.361865] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 194.361903] CR2: 00000000000008dc CR3: 0000001bdac76000 CR4: 00000000007607e0 [ 194.361953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 194.362002] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 194.362051] PKRU: 55555554 [ 194.362073] Call Trace: [ 194.362109] [] qede_remove+0x10/0x20 [qede] [ 194.362180] [] pci_device_remove+0x3e/0xc0 [ 194.362240] [] __device_release_driver+0x82/0xf0 [ 194.362285] [] device_release_driver+0x23/0x30 [ 194.362343] [] pci_stop_bus_device+0x84/0xa0 [ 194.362388] [] pci_stop_and_remove_bus_device+0x12/0x20 [ 194.362450] [] pci_iov_remove_virtfn+0xaf/0x160 [ 194.362496] [] sriov_disable+0x3c/0xf0 [ 194.362534] [] pci_disable_sriov+0x23/0x30 [ 194.362599] [] qed_sriov_disable+0x5e3/0x650 [qed] [ 194.362658] [] ? kfree+0x106/0x140 [ 194.362709] [] ? qed_free_stream_mem+0x70/0x90 [qed] [ 194.362754] [] ? kfree+0x106/0x140 [ 194.362803] [] qed_slowpath_stop+0x1a9/0x1d0 [qed] [ 194.362854] [] __qede_remove+0xae/0x130 [qede] [ 194.362904] [] qede_shutdown+0x10/0x20 [qede] [ 194.362956] [] pci_device_shutdown+0x3a/0x60 [ 194.363010] [] device_shutdown+0xfb/0x1f0 [ 194.363066] [] kernel_restart_prepare+0x36/0x40 [ 194.363107] [] kernel_restart+0x12/0x60 [ 194.363146] [] SYSC_reboot+0x229/0x260 [ 194.363196] [] ? handle_mm_fault+0x39d/0x9b0 [ 194.363253] [] ? __switch_to+0x151/0x580 [ 194.363304] [] ? __schedule+0x448/0x9c0 [ 194.363343] [] SyS_reboot+0xe/0x10 [ 194.363387] [] system_call_fastpath+0x25/0x2a [ 194.363430] Code: f9 e9 37 ff ff ff 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 4c 8d af 98 00 00 00 41 54 4c 89 ef 41 89 f4 53 e8 4c e4 55 f9 <80> b8 dc 08 00 00 01 48 89 c3 4c 8d b8 c0 08 00 00 4c 8b b0 c0 [ 194.363712] RIP [] __qede_remove+0x24/0x130 [qede] [ 194.363764] RSP [ 194.363791] CR2: 00000000000008dc Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: Sudarsana Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 8d1c208f778f..a220cc7c947a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1208,8 +1208,16 @@ enum qede_remove_mode { static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) { struct net_device *ndev = pci_get_drvdata(pdev); - struct qede_dev *edev = netdev_priv(ndev); - struct qed_dev *cdev = edev->cdev; + struct qede_dev *edev; + struct qed_dev *cdev; + + if (!ndev) { + dev_info(&pdev->dev, "Device has already been removed\n"); + return; + } + + edev = netdev_priv(ndev); + cdev = edev->cdev; DP_INFO(edev, "Starting qede_remove\n"); -- cgit From 615457a226f042bffc3a1532afb244cab37460d4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 1 Nov 2019 14:00:17 +0000 Subject: ice: fix potential infinite loop because loop counter being too small Currently the for-loop counter i is a u8 however it is being checked against a maximum value hw->num_tx_sched_layers which is a u16. Hence there is a potential wrap-around of counter i back to zero if hw->num_tx_sched_layers is greater than 255. Fix this by making i a u16. Addresses-Coverity: ("Infinite loop") Fixes: b36c598c999c ("ice: Updates to Tx scheduler code") Signed-off-by: Colin Ian King Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index fc624b73d05d..2fde9653a608 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1036,7 +1036,7 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) struct ice_aqc_query_txsched_res_resp *buf; enum ice_status status = 0; __le16 max_sibl; - u8 i; + u16 i; if (hw->layer_info) return status; -- cgit From 4eda4e0096842764d725bcfd77471a419832b074 Mon Sep 17 00:00:00 2001 From: Nicholas Nunley Date: Tue, 5 Nov 2019 04:22:14 -0800 Subject: iavf: initialize ITRN registers with correct values Since commit 92418fb14750 ("i40e/i40evf: Use usec value instead of reg value for ITR defines") the driver tracks the interrupt throttling intervals in single usec units, although the actual ITRN registers are programmed in 2 usec units. Most register programming flows in the driver correctly handle the conversion, although it is currently not applied when the registers are initialized to their default values. Most of the time this doesn't present a problem since the default values are usually immediately overwritten through the standard adaptive throttling mechanism, or updated manually by the user, but if adaptive throttling is disabled and the interval values are left alone then the incorrect value will persist. Since the intended default interval of 50 usecs (vs. 100 usecs as programmed) performs better for most traffic workloads, this can lead to performance regressions. This patch adds the correct conversion when writing the initial values to the ITRN registers. Signed-off-by: Nicholas Nunley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8f310e520b06..821987da5698 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -314,7 +314,7 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->ring_mask |= BIT(r_idx); wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx), - q_vector->rx.current_itr); + q_vector->rx.current_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; } @@ -340,7 +340,7 @@ iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->num_ringpairs++; wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx), - q_vector->tx.target_itr); + q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; } -- cgit From 4c9da6f2b8a029052c75bd4a61ae229135831177 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 6 Nov 2019 06:24:04 -0800 Subject: i40e: Fix for ethtool -m issue on X722 NIC This patch contains fix for a problem with command: 'ethtool -m ' which breaks functionality of: 'ethtool ' when called on X722 NIC Disallowed update of link phy_types on X722 NIC Currently correct value cannot be obtained from FW Previously wrong value returned by FW was used and was a root cause for incorrect output of 'ethtool ' command Signed-off-by: Arkadiusz Kubalewski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index d37c6e0e5f08..7560f06768e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1876,7 +1876,8 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE && + hw->mac.type != I40E_MAC_X722) { __le32 tmp; memcpy(&tmp, resp->link_type, sizeof(tmp)); -- cgit From 6acab13bdf2adda250b2a4d648fd4b49f1d0f14c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 6 Nov 2019 09:18:23 -0800 Subject: igb/igc: use ktime accessors for skb->tstamp When implementing launch time support in the igb and igc drivers, the skb->tstamp value is assumed to be a s64, but it's declared as a ktime_t value. Although ktime_t is typedef'd to s64 it wasn't always, and the kernel provides accessors for ktime_t values. Use the ktime_to_timespec64 and ktime_set accessors instead of directly assuming that the variable is always an s64. This improves portability if the code is ever moved to another kernel version, or if the definition of ktime_t ever changes again in the future. Signed-off-by: Jacob Keller Acked-by: Vinicius Costa Gomes Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 2 ++ drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- drivers/net/ethernet/intel/igc/igc_main.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 530613f31527..69a2daaca5c5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -20,6 +20,8 @@ /* API version 1.7 implements additional link and PHY-specific APIs */ #define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 +/* API version 1.9 for X722 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009 /* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */ #define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9148c62d9ac5..ed7e667d7eb2 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5675,8 +5675,8 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, * should have been handled by the upper layers. */ if (tx_ring->launchtime_enable) { - ts = ns_to_timespec64(first->skb->tstamp); - first->skb->tstamp = 0; + ts = ktime_to_timespec64(first->skb->tstamp); + first->skb->tstamp = ktime_set(0, 0); context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8e424dfab12e..24888676f69b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -824,8 +824,8 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, * should have been handled by the upper layers. */ if (tx_ring->launchtime_enable) { - ts = ns_to_timespec64(first->skb->tstamp); - first->skb->tstamp = 0; + ts = ktime_to_timespec64(first->skb->tstamp); + first->skb->tstamp = ktime_set(0, 0); context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->launch_time = 0; -- cgit From 705639572e8c096b12fcbff64a5c67db5b2d6ac0 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 8 Nov 2019 20:58:09 +0100 Subject: i40e: need_wakeup flag might not be set for Tx The need_wakeup flag for Tx might not be set for AF_XDP sockets that are only used to send packets. This happens if there is at least one outstanding packet that has not been completed by the hardware and we get that corresponding completion (which will not generate an interrupt since interrupts are disabled in the napi poll loop) between the time we stopped processing the Tx completions and interrupts are enabled again. In this case, the need_wakeup flag will have been cleared at the end of the Tx completion processing as we believe we will get an interrupt from the outstanding completion at a later point in time. But if this completion interrupt occurs before interrupts are enable, we lose it and should at that point really have set the need_wakeup flag since there are no more outstanding completions that can generate an interrupt to continue the processing. When this happens, user space will see a Tx queue need_wakeup of 0 and skip issuing a syscall, which means will never get into the Tx processing again and we have a deadlock. This patch introduces a quick fix for this issue by just setting the need_wakeup flag for Tx to 1 all the time. I am working on a proper fix for this that will toggle the flag appropriately, but it is more challenging than I anticipated and I am afraid that this patch will not be completed before the merge window closes, therefore this easier fix for now. This fix has a negative performance impact in the range of 0% to 4%. Towards the higher end of the scale if you have driver and application on the same core and issue a lot of packets, and towards no negative impact if you use two cores, lower transmission speeds and/or a workload that also receives packets. Signed-off-by: Magnus Karlsson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index a05dfecdd9b4..d07e1a890428 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -689,8 +689,6 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); - if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) - xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -769,12 +767,8 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, i40e_update_tx_stats(tx_ring, completed_frames, total_bytes); out_xmit: - if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { - if (tx_ring->next_to_clean == tx_ring->next_to_use) - xsk_set_tx_need_wakeup(tx_ring->xsk_umem); - else - xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); - } + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); xmit_done = i40e_xmit_zc(tx_ring, budget); -- cgit From 0843aa8f12edbd60e64e71f854eab2f452010eaa Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 8 Nov 2019 20:58:10 +0100 Subject: ixgbe: need_wakeup flag might not be set for Tx The need_wakeup flag for Tx might not be set for AF_XDP sockets that are only used to send packets. This happens if there is at least one outstanding packet that has not been completed by the hardware and we get that corresponding completion (which will not generate an interrupt since interrupts are disabled in the napi poll loop) between the time we stopped processing the Tx completions and interrupts are enabled again. In this case, the need_wakeup flag will have been cleared at the end of the Tx completion processing as we believe we will get an interrupt from the outstanding completion at a later point in time. But if this completion interrupt occurs before interrupts are enable, we lose it and should at that point really have set the need_wakeup flag since there are no more outstanding completions that can generate an interrupt to continue the processing. When this happens, user space will see a Tx queue need_wakeup of 0 and skip issuing a syscall, which means will never get into the Tx processing again and we have a deadlock. This patch introduces a quick fix for this issue by just setting the need_wakeup flag for Tx to 1 all the time. I am working on a proper fix for this that will toggle the flag appropriately, but it is more challenging than I anticipated and I am afraid that this patch will not be completed before the merge window closes, therefore this easier fix for now. This fix has a negative performance impact in the range of 0% to 4%. Towards the higher end of the scale if you have driver and application on the same core and issue a lot of packets, and towards no negative impact if you use two cores, lower transmission speeds and/or a workload that also receives packets. Signed-off-by: Magnus Karlsson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 100ac89b345d..d6feaacfbf89 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -622,8 +622,6 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) if (tx_desc) { ixgbe_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); - if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) - xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -691,12 +689,8 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); - if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { - if (tx_ring->next_to_clean == tx_ring->next_to_use) - xsk_set_tx_need_wakeup(tx_ring->xsk_umem); - else - xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); - } + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); } -- cgit