From 7d32e69310d67e6b04af04f26193f79dfc2f05c7 Mon Sep 17 00:00:00 2001 From: Slava Bacherikov Date: Thu, 2 Apr 2020 23:41:39 +0300 Subject: kbuild, btf: Fix dependencies for DEBUG_INFO_BTF Currently turning on DEBUG_INFO_SPLIT when DEBUG_INFO_BTF is also enabled will produce invalid btf file, since gen_btf function in link-vmlinux.sh script doesn't handle *.dwo files. Enabling DEBUG_INFO_REDUCED will also produce invalid btf file, and using GCC_PLUGIN_RANDSTRUCT with BTF makes no sense. Fixes: e83b9f55448a ("kbuild: add ability to generate BTF type info for vmlinux") Reported-by: Jann Horn Reported-by: Liu Yiding Signed-off-by: Slava Bacherikov Signed-off-by: Daniel Borkmann Reviewed-by: Kees Cook Acked-by: KP Singh Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200402204138.408021-1-slava@bacher09.org --- lib/Kconfig.debug | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a85a6a423bf4..4cb4671b1d9e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -241,6 +241,8 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_BTF bool "Generate BTF typeinfo" depends on DEBUG_INFO + depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED + depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST help Generate deduplicated BTF type information from DWARF debug info. Turning this on expects presence of pahole tool, which will convert -- cgit From 250e778fe1635b237d9f52c5d9df202cf23413d6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Mar 2020 11:00:30 +0100 Subject: bpf: Fix spelling mistake "arithmatic" -> "arithmetic" in test_verifier There are a couple of spelling mistakes in two literal strings, fix them. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200331100030.41372-1-colin.king@canonical.com --- tools/testing/selftests/bpf/verifier/bounds.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 4d0d09574bf4..a253a064e6e0 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -501,7 +501,7 @@ .result = REJECT }, { - "bounds check mixed 32bit and 64bit arithmatic. test1", + "bounds check mixed 32bit and 64bit arithmetic. test1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_1, -1), @@ -520,7 +520,7 @@ .result = ACCEPT }, { - "bounds check mixed 32bit and 64bit arithmatic. test2", + "bounds check mixed 32bit and 64bit arithmetic. test2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_1, -1), -- cgit From 93bbb2555b65e582be9daebb752e1b8e7380da20 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 31 Mar 2020 12:10:46 +0200 Subject: riscv, bpf: Remove BPF JIT for nommu builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BPF JIT fails to build for kernels configured to !MMU. Without an MMU, the BPF JIT does not make much sense, therefore this patch disables the JIT for nommu builds. This was reported by the kbuild test robot: All errors (new ones prefixed by >>): arch/riscv/net/bpf_jit_comp64.c: In function 'bpf_jit_alloc_exec': >> arch/riscv/net/bpf_jit_comp64.c:1094:47: error: 'BPF_JIT_REGION_START' undeclared (first use in this function) 1094 | return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, | ^~~~~~~~~~~~~~~~~~~~ arch/riscv/net/bpf_jit_comp64.c:1094:47: note: each undeclared identifier is reported only once for each function it appears in >> arch/riscv/net/bpf_jit_comp64.c:1095:9: error: 'BPF_JIT_REGION_END' undeclared (first use in this function) 1095 | BPF_JIT_REGION_END, GFP_KERNEL, | ^~~~~~~~~~~~~~~~~~ arch/riscv/net/bpf_jit_comp64.c:1098:1: warning: control reaches end of non-void function [-Wreturn-type] 1098 | } | ^ Reported-by: kbuild test robot Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Luke Nelson Link: https://lore.kernel.org/bpf/20200331101046.23252-1-bjorn.topel@gmail.com --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8672e77a5b7a..bd35ac72fe24 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -55,7 +55,7 @@ config RISCV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB select ARCH_HAS_DEBUG_VIRTUAL - select HAVE_EBPF_JIT + select HAVE_EBPF_JIT if MMU select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE select ARCH_WANT_HUGE_PMD_SHARE if 64BIT -- cgit From 7a1ca97269ee197ea967de2c9412d8e7e2274ee6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 2 Apr 2020 14:55:24 +0200 Subject: net, sk_msg: Don't use RCU_INIT_POINTER on sk_user_data sparse reports an error due to use of RCU_INIT_POINTER helper to assign to sk_user_data pointer, which is not tagged with __rcu: net/core/sock.c:1875:25: error: incompatible types in comparison expression (different address spaces): net/core/sock.c:1875:25: void [noderef] * net/core/sock.c:1875:25: void * ... and rightfully so. sk_user_data is not always treated as a pointer to an RCU-protected data. When it is used to point at an RCU-protected object, we access it with __sk_user_data to inform sparse about it. In this case, when the child socket does not inherit sk_user_data from the parent, there is no reason to treat it as an RCU-protected pointer. Use a regular assignment to clear the pointer value. Fixes: f1ff5ce2cd5e ("net, sk_msg: Clear sk_user_data pointer on clone if tagged") Reported-by: kbuild test robot Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200402125524.851439-1-jakub@cloudflare.com --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index da32d9b6d09f..0510826bf860 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1872,7 +1872,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * as not suitable for copying when cloning. */ if (sk_user_data_is_nocopy(newsk)) - RCU_INIT_POINTER(newsk->sk_user_data, NULL); + newsk->sk_user_data = NULL; newsk->sk_err = 0; newsk->sk_err_soft = 0; -- cgit From 5222d69642a09261222fb9703761a029db16cadf Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 2 Apr 2020 22:07:51 +0200 Subject: bpf, lsm: Fix the file_mprotect LSM test. The test was previously using an mprotect on the heap memory allocated using malloc and was expecting the allocation to be always using sbrk(2). This is, however, not always true and in certain conditions malloc may end up using anonymous mmaps for heap alloctions. This means that the following condition that is used in the "lsm/file_mprotect" program is not sufficent to detect all mprotect calls done on heap memory: is_heap = (vma->vm_start >= vma->vm_mm->start_brk && vma->vm_end <= vma->vm_mm->brk); The test is updated to use an mprotect on memory allocated on the stack. While this would result in the splitting of the vma, this happens only after the security_file_mprotect hook. So, the condition used in the BPF program holds true. Fixes: 03e54f100d57 ("bpf: lsm: Add selftests for BPF_PROG_TYPE_LSM") Reported-by: Alexei Starovoitov Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200402200751.26372-1-kpsingh@chromium.org --- tools/testing/selftests/bpf/prog_tests/test_lsm.c | 18 +++++++++--------- tools/testing/selftests/bpf/progs/lsm.c | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 1e4c258de09d..b17eb2045c1d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -15,7 +15,10 @@ char *CMD_ARGS[] = {"true", NULL}; -int heap_mprotect(void) +#define GET_PAGE_ADDR(ADDR, PAGE_SIZE) \ + (char *)(((unsigned long) (ADDR + PAGE_SIZE)) & ~(PAGE_SIZE-1)) + +int stack_mprotect(void) { void *buf; long sz; @@ -25,12 +28,9 @@ int heap_mprotect(void) if (sz < 0) return sz; - buf = memalign(sz, 2 * sz); - if (buf == NULL) - return -ENOMEM; - - ret = mprotect(buf, sz, PROT_READ | PROT_WRITE | PROT_EXEC); - free(buf); + buf = alloca(sz * 3); + ret = mprotect(GET_PAGE_ADDR(buf, sz), sz, + PROT_READ | PROT_WRITE | PROT_EXEC); return ret; } @@ -73,8 +73,8 @@ void test_test_lsm(void) skel->bss->monitored_pid = getpid(); - err = heap_mprotect(); - if (CHECK(errno != EPERM, "heap_mprotect", "want errno=EPERM, got %d\n", + err = stack_mprotect(); + if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n", errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index a4e3c223028d..b4598d4bc4f7 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -23,12 +23,12 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, return ret; __u32 pid = bpf_get_current_pid_tgid() >> 32; - int is_heap = 0; + int is_stack = 0; - is_heap = (vma->vm_start >= vma->vm_mm->start_brk && - vma->vm_end <= vma->vm_mm->brk); + is_stack = (vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack); - if (is_heap && monitored_pid == pid) { + if (is_stack && monitored_pid == pid) { mprotect_count++; ret = -EPERM; } -- cgit From 72239f2795fab9a58633bd0399698ff7581534a3 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 1 Apr 2020 17:14:38 +0200 Subject: netfilter: nft_set_rbtree: Drop spurious condition for overlap detection on insertion Case a1. for overlap detection in __nft_rbtree_insert() is not a valid one: start-after-start is not needed to detect any type of interval overlap and it actually results in a false positive if, while descending the tree, this is the only step we hit after starting from the root. This introduced a regression, as reported by Pablo, in Python tests cases ip/ip.t and ip/numgen.t: ip/ip.t: ERROR: line 124: add rule ip test-ip4 input ip hdrlength vmap { 0-4 : drop, 5 : accept, 6 : continue } counter: This rule should not have failed. ip/numgen.t: ERROR: line 7: add rule ip test-ip4 pre dnat to numgen inc mod 10 map { 0-5 : 192.168.10.100, 6-9 : 192.168.20.200}: This rule should not have failed. Drop case a1. and renumber others, so that they are a bit clearer. In order for these diagrams to be readily understandable, a bigger rework is probably needed, such as an ASCII art of the actual rbtree (instead of a flattened version). Shell script test sets/0044interval_overlap_0 should cover all possible cases for false negatives, so I consider that test case still sufficient after this change. v2: Fix comments for cases a3. and b3. Reported-by: Pablo Neira Ayuso Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3a5552e14f75..3ffef454d469 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -218,27 +218,26 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, /* Detect overlaps as we descend the tree. Set the flag in these cases: * - * a1. |__ _ _? >|__ _ _ (insert start after existing start) - * a2. _ _ __>| ?_ _ __| (insert end before existing end) - * a3. _ _ ___| ?_ _ _>| (insert end after existing end) - * a4. >|__ _ _ _ _ __| (insert start before existing end) + * a1. _ _ __>| ?_ _ __| (insert end before existing end) + * a2. _ _ ___| ?_ _ _>| (insert end after existing end) + * a3. _ _ ___? >|_ _ __| (insert start before existing end) * * and clear it later on, as we eventually reach the points indicated by * '?' above, in the cases described below. We'll always meet these * later, locally, due to tree ordering, and overlaps for the intervals * that are the closest together are always evaluated last. * - * b1. |__ _ _! >|__ _ _ (insert start after existing end) - * b2. _ _ __>| !_ _ __| (insert end before existing start) - * b3. !_____>| (insert end after existing start) + * b1. _ _ __>| !_ _ __| (insert end before existing start) + * b2. _ _ ___| !_ _ _>| (insert end after existing start) + * b3. _ _ ___! >|_ _ __| (insert start after existing end) * - * Case a4. resolves to b1.: + * Case a3. resolves to b3.: * - if the inserted start element is the leftmost, because the '0' * element in the tree serves as end element * - otherwise, if an existing end is found. Note that end elements are * always inserted after corresponding start elements. * - * For a new, rightmost pair of elements, we'll hit cases b1. and b3., + * For a new, rightmost pair of elements, we'll hit cases b3. and b2., * in that order. * * The flag is also cleared in two special cases: @@ -262,9 +261,9 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, p = &parent->rb_left; if (nft_rbtree_interval_start(new)) { - overlap = nft_rbtree_interval_start(rbe) && - nft_set_elem_active(&rbe->ext, - genmask); + if (nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, genmask)) + overlap = false; } else { overlap = nft_rbtree_interval_end(rbe) && nft_set_elem_active(&rbe->ext, -- cgit From a26c1e49c8e97922edc8d7e23683384729d09f77 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 31 Mar 2020 23:02:59 +0200 Subject: netfilter: nf_tables: do not update stateful expressions if lookup is inverted Initialize set lookup matching element to NULL. Otherwise, the NFT_LOOKUP_F_INV flag reverses the matching logic and it leads to deference an uninitialized pointer to the matching element. Make sure element data area and stateful expression are accessed if there is a matching set element. This patch undoes 24791b9aa1ab ("netfilter: nft_set_bitmap: initialize set element extension in lookups") which is not required anymore. Fixes: 339706bc21c1 ("netfilter: nft_lookup: update element stateful expression") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nft_lookup.c | 12 +++++++----- net/netfilter/nft_set_bitmap.c | 1 - 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6eb627b3c99b..4ff7c81e6717 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -901,7 +901,7 @@ static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, { struct nft_expr *expr; - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { + if (__nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) { expr = nft_set_ext_expr(ext); expr->ops->eval(expr, regs, pkt); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 1e70359d633c..f1363b8aabba 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -29,7 +29,7 @@ void nft_lookup_eval(const struct nft_expr *expr, { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; - const struct nft_set_ext *ext; + const struct nft_set_ext *ext = NULL; bool found; found = set->ops->lookup(nft_net(pkt), set, ®s->data[priv->sreg], @@ -39,11 +39,13 @@ void nft_lookup_eval(const struct nft_expr *expr, return; } - if (set->flags & NFT_SET_MAP) - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), set->dlen); + if (ext) { + if (set->flags & NFT_SET_MAP) + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), set->dlen); - nft_set_elem_update_expr(ext, regs, pkt); + nft_set_elem_update_expr(ext, regs, pkt); + } } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 32f0fc8be3a4..2a81ea421819 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -81,7 +81,6 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, u32 idx, off; nft_bitmap_location(set, key, &idx, &off); - *ext = NULL; return nft_bitmap_active(priv->bitmap, idx, off, genmask); } -- cgit From bc9fe6143de5df8fb36cf1532b48fecf35868571 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Tue, 31 Mar 2020 09:35:59 -0700 Subject: netfilter: xt_IDLETIMER: target v1 - match Android layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Android has long had an extension to IDLETIMER to send netlink messages to userspace, see: https://android.googlesource.com/kernel/common/+/refs/heads/android-mainline/include/uapi/linux/netfilter/xt_IDLETIMER.h#42 Note: this is idletimer target rev 1, there is no rev 0 in the Android common kernel sources, see registration at: https://android.googlesource.com/kernel/common/+/refs/heads/android-mainline/net/netfilter/xt_IDLETIMER.c#483 When we compare that to upstream's new idletimer target rev 1: https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git/tree/include/uapi/linux/netfilter/xt_IDLETIMER.h#n46 We immediately notice that these two rev 1 structs are the same size and layout, and that while timer_type and send_nl_msg are differently named and serve a different purpose, they're at the same offset. This makes them impossible to tell apart - and thus one cannot know in a mixed Android/vanilla environment whether one means timer_type or send_nl_msg. Since this is iptables/netfilter uapi it introduces a problem between iptables (vanilla vs Android) userspace and kernel (vanilla vs Android) if the two don't match each other. Additionally when at some point in the future Android picks up 5.7+ it's not at all clear how to resolve the resulting merge conflict. Furthermore, since upgrading the kernel on old Android phones is pretty much impossible there does not seem to be an easy way out of this predicament. The only thing I've been able to come up with is some super disgusting kernel version >= 5.7 check in the iptables binary to flip between different struct layouts. By adding a dummy field to the vanilla Linux kernel header file we can force the two structs to be compatible with each other. Long term I think I would like to deprecate send_nl_msg out of Android entirely, but I haven't quite been able to figure out exactly how we depend on it. It seems to be very similar to sysfs notifications but with some extra info. Currently it's actually always enabled whenever Android uses the IDLETIMER target, so we could also probably entirely remove it from the uapi in favour of just always enabling it, but again we can't upgrade old kernels already in the field. (Also note that this doesn't change the structure's size, as it is simply fitting into the pre-existing padding, and that since 5.7 hasn't been released yet, there's still time to make this uapi visible change) Cc: Manoj Basapathi Cc: Subash Abhinov Kasiviswanathan Signed-off-by: Maciej Żenczykowski Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_IDLETIMER.h | 1 + net/netfilter/xt_IDLETIMER.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 434e6506abaa..49ddcdc61c09 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -48,6 +48,7 @@ struct idletimer_tg_info_v1 { char label[MAX_IDLETIMER_LABEL_SIZE]; + __u8 send_nl_msg; /* unused: for compatibility with Android */ __u8 timer_type; /* for kernel module internal use only */ diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 75bd0e5dd312..7b2f359bfce4 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -346,6 +346,9 @@ static int idletimer_tg_checkentry_v1(const struct xt_tgchk_param *par) pr_debug("checkentry targinfo%s\n", info->label); + if (info->send_nl_msg) + return -EOPNOTSUPP; + ret = idletimer_tg_helper((struct idletimer_tg_info *)info); if(ret < 0) { -- cgit From 7fb6f78df7003234d8df4f90aeecc432d7d0c804 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Apr 2020 10:37:16 -0700 Subject: netfilter: nf_tables: do not leave dangling pointer in nf_tables_set_alloc_name If nf_tables_set_alloc_name() frees set->name, we better clear set->name to avoid a future use-after-free or invalid-free. BUG: KASAN: double-free or invalid-free in nf_tables_newset+0x1ed6/0x2560 net/netfilter/nf_tables_api.c:4148 CPU: 0 PID: 28233 Comm: syz-executor.0 Not tainted 5.6.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x188/0x20d lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd3/0x315 mm/kasan/report.c:374 kasan_report_invalid_free+0x61/0xa0 mm/kasan/report.c:468 __kasan_slab_free+0x129/0x140 mm/kasan/common.c:455 __cache_free mm/slab.c:3426 [inline] kfree+0x109/0x2b0 mm/slab.c:3757 nf_tables_newset+0x1ed6/0x2560 net/netfilter/nf_tables_api.c:4148 nfnetlink_rcv_batch+0x83a/0x1610 net/netfilter/nfnetlink.c:433 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:543 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:561 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6b9/0x7d0 net/socket.c:2345 ___sys_sendmsg+0x100/0x170 net/socket.c:2399 __sys_sendmsg+0xec/0x1b0 net/socket.c:2432 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45c849 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe5ca21dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fe5ca21e6d4 RCX: 000000000045c849 RDX: 0000000000000000 RSI: 0000000020000c40 RDI: 0000000000000003 RBP: 000000000076bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000095b R14: 00000000004cc0e9 R15: 000000000076bf0c Allocated by task 28233: save_stack+0x1b/0x80 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:515 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:488 __do_kmalloc mm/slab.c:3656 [inline] __kmalloc_track_caller+0x159/0x790 mm/slab.c:3671 kvasprintf+0xb5/0x150 lib/kasprintf.c:25 kasprintf+0xbb/0xf0 lib/kasprintf.c:59 nf_tables_set_alloc_name net/netfilter/nf_tables_api.c:3536 [inline] nf_tables_newset+0x1543/0x2560 net/netfilter/nf_tables_api.c:4088 nfnetlink_rcv_batch+0x83a/0x1610 net/netfilter/nfnetlink.c:433 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:543 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:561 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6b9/0x7d0 net/socket.c:2345 ___sys_sendmsg+0x100/0x170 net/socket.c:2399 __sys_sendmsg+0xec/0x1b0 net/socket.c:2432 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 28233: save_stack+0x1b/0x80 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:337 [inline] __kasan_slab_free+0xf7/0x140 mm/kasan/common.c:476 __cache_free mm/slab.c:3426 [inline] kfree+0x109/0x2b0 mm/slab.c:3757 nf_tables_set_alloc_name net/netfilter/nf_tables_api.c:3544 [inline] nf_tables_newset+0x1f73/0x2560 net/netfilter/nf_tables_api.c:4088 nfnetlink_rcv_batch+0x83a/0x1610 net/netfilter/nfnetlink.c:433 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:543 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:561 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x537/0x740 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x882/0xe10 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6b9/0x7d0 net/socket.c:2345 ___sys_sendmsg+0x100/0x170 net/socket.c:2399 __sys_sendmsg+0xec/0x1b0 net/socket.c:2432 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8880a6032d00 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 0 bytes inside of 32-byte region [ffff8880a6032d00, ffff8880a6032d20) The buggy address belongs to the page: page:ffffea0002980c80 refcount:1 mapcount:0 mapping:ffff8880aa0001c0 index:0xffff8880a6032fc1 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0002a3be88 ffffea00029b1908 ffff8880aa0001c0 raw: ffff8880a6032fc1 ffff8880a6032000 000000010000003e 0000000000000000 page dumped because: kasan: bad access detected Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d0ab5ffa1e2c..f91e96d8de05 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3542,6 +3542,7 @@ cont: continue; if (!strcmp(set->name, i->name)) { kfree(set->name); + set->name = NULL; return -ENFILE; } } -- cgit From b135fc0801b671c50de103572b819bcd41603613 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sun, 16 Feb 2020 22:56:54 +0530 Subject: netfilter: ipset: Pass lockdep expression to RCU lists ip_set_type_list is traversed using list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of ip_set_type_mutex. Hence, add corresponding lockdep expression to silence false-positive warnings, and harden RCU lists. Signed-off-by: Amol Grover Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8dd17589217d..340cb955af25 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -86,7 +86,8 @@ find_set_type(const char *name, u8 family, u8 revision) { struct ip_set_type *type; - list_for_each_entry_rcu(type, &ip_set_type_list, list) + list_for_each_entry_rcu(type, &ip_set_type_list, list, + lockdep_is_held(&ip_set_type_mutex)) if (STRNCMP(type->name, name) && (type->family == family || type->family == NFPROTO_UNSPEC) && -- cgit From 5bf8e6096c7390f8f2c4d5394b5e49823adb004e Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 27 Mar 2020 14:03:07 +0100 Subject: brcmfmac: add stub for monitor interface xmit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the struct net_device_ops documentation .ndo_start_xmit is "Required; cannot be NULL.". Missing it may crash kernel easily: [ 341.216709] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 341.224836] pgd = 26088755 [ 341.227544] [00000000] *pgd=00000000 [ 341.231135] Internal error: Oops: 80000007 [#1] SMP ARM [ 341.236367] Modules linked in: pppoe ppp_async iptable_nat brcmfmac xt_state xt_nat xt_conntrack xt_REDIRECT xt_MASQU [ 341.304689] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.24 #0 [ 341.310621] Hardware name: BCM5301X [ 341.314116] PC is at 0x0 [ 341.316664] LR is at dev_hard_start_xmit+0x8c/0x11c [ 341.321546] pc : [<00000000>] lr : [] psr: 60000113 [ 341.327821] sp : c0801c30 ip : c610cf00 fp : c08048e4 [ 341.333051] r10: c073a63a r9 : c08044dc r8 : c6c04e00 [ 341.338283] r7 : 00000000 r6 : c60f5000 r5 : 00000000 r4 : c6a9c3c0 [ 341.344820] r3 : 00000000 r2 : bf25a13c r1 : c60f5000 r0 : c6a9c3c0 [ 341.351358] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 341.358504] Control: 10c5387d Table: 0611c04a DAC: 00000051 [ 341.364257] Process swapper/0 (pid: 0, stack limit = 0xc68ed0ca) [ 341.370271] Stack: (0xc0801c30 to 0xc0802000) [ 341.374633] 1c20: c6e7d480 c0802d00 c60f5050 c0801c6c [ 341.382825] 1c40: c60f5000 c6a9c3c0 c6f90000 c6f9005c c6c04e00 c60f5000 00000000 c6f9005c [ 341.391015] 1c60: 00000000 c04a033c 00f90200 00000010 c6a9c3c0 c6a9c3c0 c6f90000 00000000 [ 341.399205] 1c80: 00000000 00000000 00000000 c046a7ac c6f9005c 00000001 fffffff4 00000000 [ 341.407395] 1ca0: c6f90200 00000000 c60f5000 c0479550 00000000 c6f90200 c6a9c3c0 16000000 [ 341.415586] 1cc0: 0000001c 6f4ad52f c6197040 b6df9387 36000000 c0520404 c073a80c c6a9c3c0 [ 341.423777] 1ce0: 00000000 c6d643c0 c6a9c3c0 c0800024 00000001 00000001 c6d643c8 c6a9c3c0 [ 341.431967] 1d00: c081b9c0 c7abca80 c610c840 c081b9c0 0000001c 00400000 c6bc5e6c c0522fb4 [ 341.440157] 1d20: c6d64400 00000004 c6bc5e0a 00000000 c60f5000 c7abca80 c081b9c0 c0522f54 [ 341.448348] 1d40: c6a9c3c0 c7abca80 c0803e48 c0549c94 c610c828 0000000a c0801d74 00000003 [ 341.456538] 1d60: c6ec8f0a 00000000 c60f5000 c7abca80 c081b9c0 c0548520 0000000a 00000000 [ 341.464728] 1d80: 00000000 003a0000 00000000 00000000 00000000 00000000 00000000 00000000 [ 341.472919] 1da0: 000002ff 00000000 00000000 16000000 00000000 00000000 00000000 00000000 [ 341.481110] 1dc0: 00000000 0000008f 00000000 00000000 00000000 2d132a69 c6bc5e40 00000000 [ 341.489300] 1de0: c6bc5e40 c6a9c3c0 00000000 c6ec8e50 00000001 c054b070 00000001 00000000 [ 341.497490] 1e00: c0807200 c6bc5e00 00000000 ffffe000 00000100 c054aea4 00000000 00000000 [ 341.505681] 1e20: 00000122 00400000 c0802d00 c0172e80 6f56a70e ffffffff 6f56a70e c7eb9cc0 [ 341.513871] 1e40: c7eb82c0 00000000 c0801e60 c017309c 00000000 00000000 07780000 c07382c0 [ 341.522061] 1e60: 00000000 c7eb9cc0 c0739cc0 c0803f74 c0801e70 c0801e70 c0801ea4 c013d380 [ 341.530253] 1e80: 00000000 000000a0 00000001 c0802084 c0802080 40000001 ffffe000 00000100 [ 341.538443] 1ea0: c0802080 c01021e8 c8803100 10c5387d 00000000 c07341f0 c0739880 0000000a [ 341.546633] 1ec0: c0734180 00001017 c0802d00 c062aa98 00200002 c062aa60 c8803100 c073984c [ 341.554823] 1ee0: 00000000 00000001 00000000 c7810000 c8803100 10c5387d 00000000 c011c188 [ 341.563014] 1f00: c073984c c015f0f8 c0804244 c0815ae4 c880210c c8802100 c0801f40 c037c584 [ 341.571204] 1f20: c01035f8 60000013 ffffffff c0801f74 c080afd4 c0800000 10c5387d c0101a8c [ 341.579395] 1f40: 00000000 004ac9dc c7eba4b4 c010ee60 ffffe000 c0803e68 c0803ea8 00000001 [ 341.587587] 1f60: c080afd4 c062ca20 10c5387d 00000000 00000000 c0801f90 c01035f4 c01035f8 [ 341.595776] 1f80: 60000013 ffffffff 00000051 00000000 ffffe000 c013ff50 000000ce c0803e40 [ 341.603967] 1fa0: c082216c 00000000 00000001 c072ba38 10c5387d c0140214 c0822184 c0700df8 [ 341.612157] 1fc0: ffffffff ffffffff 00000000 c070058c c072ba38 2d162e71 00000000 c0700330 [ 341.620348] 1fe0: 00000051 10c0387d 000000ff 00a521d0 413fc090 00000000 00000000 00000000 [ 341.628558] [] (dev_hard_start_xmit) from [] (sch_direct_xmit+0xe4/0x2bc) [ 341.637106] [] (sch_direct_xmit) from [] (__dev_queue_xmit+0x6a4/0x72c) [ 341.645481] [] (__dev_queue_xmit) from [] (ip6_finish_output2+0x18c/0x434) [ 341.654112] [] (ip6_finish_output2) from [] (ip6_output+0x5c/0xd0) [ 341.662053] [] (ip6_output) from [] (mld_sendpack+0x1a0/0x1a8) [ 341.669640] [] (mld_sendpack) from [] (mld_ifc_timer_expire+0x1cc/0x2e4) [ 341.678111] [] (mld_ifc_timer_expire) from [] (call_timer_fn.constprop.3+0x24/0x98) [ 341.687527] [] (call_timer_fn.constprop.3) from [] (run_timer_softirq+0x1a8/0x1e4) [ 341.696860] [] (run_timer_softirq) from [] (__do_softirq+0x120/0x2b0) [ 341.705066] [] (__do_softirq) from [] (irq_exit+0x78/0x84) [ 341.712317] [] (irq_exit) from [] (__handle_domain_irq+0x60/0xb4) [ 341.720179] [] (__handle_domain_irq) from [] (gic_handle_irq+0x4c/0x90) [ 341.728549] [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x90) Fixes: 20f2c5fa3af0 ("brcmfmac: add initial support for monitor mode") Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200327130307.26477-1-zajec5@gmail.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 23627c953a5e..436f501be937 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -729,9 +729,18 @@ static int brcmf_net_mon_stop(struct net_device *ndev) return err; } +static netdev_tx_t brcmf_net_mon_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} + static const struct net_device_ops brcmf_netdev_ops_mon = { .ndo_open = brcmf_net_mon_open, .ndo_stop = brcmf_net_mon_stop, + .ndo_start_xmit = brcmf_net_mon_start_xmit, }; int brcmf_net_mon_attach(struct brcmf_if *ifp) -- cgit From c9be1a642a7b9ec021e3f32e084dc781b3e5216d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 3 Apr 2020 16:34:14 +0800 Subject: ath11k: fix compiler warnings without CONFIG_THERMAL drivers/net/wireless/ath/ath11k/thermal.h:45:1: warning: no return statement in function returning non-void [-Wreturn-type] drivers/net/wireless/ath/ath11k/core.c:416:28: error: passing argument 1 of 'ath11k_thermal_unregister' from incompatible pointer type [-Werror=incompatible-pointer-types] Add missing return 0 in ath11k_thermal_set_throttling, and fix ath11k_thermal_unregister param type. Fixes: 2a63bbca06b2 ("ath11k: add thermal cooling device support") Signed-off-by: YueHaibing Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200403083414.31392-1-yuehaibing@huawei.com --- drivers/net/wireless/ath/ath11k/thermal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/thermal.h b/drivers/net/wireless/ath/ath11k/thermal.h index 459b8d49c184..f9af55f3682d 100644 --- a/drivers/net/wireless/ath/ath11k/thermal.h +++ b/drivers/net/wireless/ath/ath11k/thermal.h @@ -36,12 +36,13 @@ static inline int ath11k_thermal_register(struct ath11k_base *sc) return 0; } -static inline void ath11k_thermal_unregister(struct ath11k *ar) +static inline void ath11k_thermal_unregister(struct ath11k_base *sc) { } static inline int ath11k_thermal_set_throttling(struct ath11k *ar, u32 throttle_state) { + return 0; } static inline void ath11k_thermal_event_temperature(struct ath11k *ar, -- cgit From db5c97f02373917efe2c218ebf8e3d8b19e343b6 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 2 Apr 2020 15:52:10 +0800 Subject: xsk: Fix out of boundary write in __xsk_rcv_memcpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit first_len is the remainder of the first page we're copying. If this size is larger, then out of page boundary write will otherwise happen. Fixes: c05cd3645814 ("xsk: add support to allow unaligned chunk placement") Signed-off-by: Li RongQing Signed-off-by: Daniel Borkmann Acked-by: Jonathan Lemon Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1585813930-19712-1-git-send-email-lirongqing@baidu.com --- net/xdp/xsk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 356f90e4522b..c350108aa38d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -131,8 +131,9 @@ static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf, u64 page_start = addr & ~(PAGE_SIZE - 1); u64 first_len = PAGE_SIZE - (addr - page_start); - memcpy(to_buf, from_buf, first_len + metalen); - memcpy(next_pg_addr, from_buf + first_len, len - first_len); + memcpy(to_buf, from_buf, first_len); + memcpy(next_pg_addr, from_buf + first_len, + len + metalen - first_len); return; } -- cgit From 4734b0fefbbf98f8c119eb8344efa19dac82cd2c Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Sat, 4 Apr 2020 01:14:30 -0400 Subject: libbpf: Initialize *nl_pid so gcc 10 is happy Builds of Fedora's kernel-tools package started to fail with "may be used uninitialized" warnings for nl_pid in bpf_set_link_xdp_fd() and bpf_get_link_xdp_info() on the s390 architecture. Although libbpf_netlink_open() always returns a negative number when it does not set *nl_pid, the compiler does not determine this and thus believes the variable might be used uninitialized. Assuage gcc's fears by explicitly initializing nl_pid. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1807781 Signed-off-by: Jeremy Cline Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200404051430.698058-1-jcline@redhat.com --- tools/lib/bpf/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 18b5319025e1..9a14694176de 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -142,7 +142,7 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, struct ifinfomsg ifinfo; char attrbuf[64]; } req; - __u32 nl_pid; + __u32 nl_pid = 0; sock = libbpf_netlink_open(&nl_pid); if (sock < 0) @@ -288,7 +288,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, { struct xdp_id_md xdp_id = {}; int sock, ret; - __u32 nl_pid; + __u32 nl_pid = 0; __u32 mask; if (flags & ~XDP_FLAGS_MASK || !info_size) -- cgit From 0ac16296ffc638f5163f9aeeeb1fe447268e449f Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 3 Apr 2020 16:07:34 +0800 Subject: bpf: Fix a typo "inacitve" -> "inactive" There is a typo in struct bpf_lru_list's next_inactive_rotation description, thus fix s/inacitve/inactive/. Signed-off-by: Qiujun Huang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1585901254-30377-1-git-send-email-hqjagain@gmail.com --- kernel/bpf/bpf_lru_list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index f02504640e18..6b12f06ee18c 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -30,7 +30,7 @@ struct bpf_lru_node { struct bpf_lru_list { struct list_head lists[NR_BPF_LRU_LIST_T]; unsigned int counts[NR_BPF_LRU_LIST_COUNT]; - /* The next inacitve list rotation starts from here */ + /* The next inactive list rotation starts from here */ struct list_head *next_inactive_rotation; raw_spinlock_t lock ____cacheline_aligned_in_smp; -- cgit From d9583cdf2f38d0f526d9a8c8564dd2e35e649bc7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 Apr 2020 14:10:11 +0200 Subject: netfilter: nf_tables: report EOPNOTSUPP on unsupported flags/object type EINVAL should be used for malformed netlink messages. New userspace utility and old kernels might easily result in EINVAL when exercising new set features, which is misleading. Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f91e96d8de05..21cbde6ecee3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3963,7 +3963,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) - return -EINVAL; + return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == (NFT_SET_MAP | NFT_SET_OBJECT)) @@ -4001,7 +4001,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); if (objtype == NFT_OBJECT_UNSPEC || objtype > NFT_OBJECT_MAX) - return -EINVAL; + return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else -- cgit From ef516e8625ddea90b3a0313f3a0b0baa83db7ac2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 7 Apr 2020 14:10:38 +0200 Subject: netfilter: nf_tables: reintroduce the NFT_SET_CONCAT flag Stefano originally proposed to introduce this flag, users hit EOPNOTSUPP in new binaries with old kernels when defining a set with ranges in a concatenation. Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields") Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 30f2a87270dc..4565456c0ef4 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -276,6 +276,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_TIMEOUT: set uses timeouts * @NFT_SET_EVAL: set can be updated from the evaluation path * @NFT_SET_OBJECT: set contains stateful objects + * @NFT_SET_CONCAT: set contains a concatenation */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -285,6 +286,7 @@ enum nft_set_flags { NFT_SET_TIMEOUT = 0x10, NFT_SET_EVAL = 0x20, NFT_SET_OBJECT = 0x40, + NFT_SET_CONCAT = 0x80, }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 21cbde6ecee3..9adfbc7e8ae7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3962,7 +3962,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | - NFT_SET_OBJECT)) + NFT_SET_OBJECT | NFT_SET_CONCAT)) return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == -- cgit From 489553dd13a88d8a882db10622ba8b9b58582ce4 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Mon, 6 Apr 2020 22:16:04 +0000 Subject: riscv, bpf: Fix offset range checking for auipc+jalr on RV64 The existing code in emit_call on RV64 checks that the PC-relative offset to the function fits in 32 bits before calling emit_jump_and_link to emit an auipc+jalr pair. However, this check is incorrect because offsets in the range [2^31 - 2^11, 2^31 - 1] cannot be encoded using auipc+jalr on RV64 (see discussion [1]). The RISC-V spec has recently been updated to reflect this fact [2, 3]. This patch fixes the problem by moving the check on the offset into emit_jump_and_link and modifying it to the correct range of encodable offsets, which is [-2^31 - 2^11, 2^31 - 2^11). This also enforces the check on the offset to other uses of emit_jump_and_link (e.g., BPF_JA) as well. Currently, this bug is unlikely to be triggered, because the memory region from which JITed images are allocated is close enough to kernel text for the offsets to not become too large; and because the bounds on BPF program size are small enough. This patch prevents this problem from becoming an issue if either of these change. [1]: https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/bwWFhBnnZFQ [2]: https://github.com/riscv/riscv-isa-manual/commit/b1e42e09ac55116dbf9de5e4fb326a5a90e4a993 [3]: https://github.com/riscv/riscv-isa-manual/commit/4c1b2066ebd2965a422e41eb262d0a208a7fea07 Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200406221604.18547-1-luke.r.nels@gmail.com --- arch/riscv/net/bpf_jit_comp64.c | 49 +++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index cc1985d8750a..d208a9fd6c52 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -110,6 +110,16 @@ static bool is_32b_int(s64 val) return -(1L << 31) <= val && val < (1L << 31); } +static bool in_auipc_jalr_range(s64 val) +{ + /* + * auipc+jalr can reach any signed PC-relative offset in the range + * [-2^31 - 2^11, 2^31 - 2^11). + */ + return (-(1L << 31) - (1L << 11)) <= val && + val < ((1L << 31) - (1L << 11)); +} + static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) { /* Note that the immediate from the add is sign-extended, @@ -380,20 +390,24 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) *rd = RV_REG_T2; } -static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, - struct rv_jit_context *ctx) +static int emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, + struct rv_jit_context *ctx) { s64 upper, lower; if (rvoff && is_21b_int(rvoff) && !force_jalr) { emit(rv_jal(rd, rvoff >> 1), ctx); - return; + return 0; + } else if (in_auipc_jalr_range(rvoff)) { + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); + return 0; } - upper = (rvoff + (1 << 11)) >> 12; - lower = rvoff & 0xfff; - emit(rv_auipc(RV_REG_T1, upper), ctx); - emit(rv_jalr(rd, RV_REG_T1, lower), ctx); + pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff); + return -ERANGE; } static bool is_signed_bpf_cond(u8 cond) @@ -407,18 +421,16 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) s64 off = 0; u64 ip; u8 rd; + int ret; if (addr && ctx->insns) { ip = (u64)(long)(ctx->insns + ctx->ninsns); off = addr - ip; - if (!is_32b_int(off)) { - pr_err("bpf-jit: target call addr %pK is out of range\n", - (void *)addr); - return -ERANGE; - } } - emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); + ret = emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); + if (ret) + return ret; rd = bpf_to_rv_reg(BPF_REG_0, ctx); emit(rv_addi(rd, RV_REG_A0, 0), ctx); return 0; @@ -429,7 +441,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; - int s, e, rvoff, i = insn - ctx->prog->insnsi; + int s, e, rvoff, ret, i = insn - ctx->prog->insnsi; struct bpf_prog_aux *aux = ctx->prog->aux; u8 rd = -1, rs = -1, code = insn->code; s16 off = insn->off; @@ -699,7 +711,9 @@ out_be: /* JUMP off */ case BPF_JMP | BPF_JA: rvoff = rv_offset(i, off, ctx); - emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + if (ret) + return ret; break; /* IF (dst COND src) JUMP off */ @@ -801,7 +815,6 @@ out_be: case BPF_JMP | BPF_CALL: { bool fixed; - int ret; u64 addr; mark_call(ctx); @@ -826,7 +839,9 @@ out_be: break; rvoff = epilogue_offset(ctx); - emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + if (ret) + return ret; break; /* dst = imm64 */ -- cgit From f07cbad29741407ace2a9688548fa93d9cb38df3 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 6 Apr 2020 22:09:45 -0700 Subject: libbpf: Fix bpf_get_link_xdp_id flags handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently if one of XDP_FLAGS_{DRV,HW,SKB}_MODE flags is passed to bpf_get_link_xdp_id() and there is a single XDP program attached to ifindex, that program's id will be returned by bpf_get_link_xdp_id() in prog_id argument no matter what mode the program is attached in, i.e. flags argument is not taken into account. For example, if there is a single program attached with XDP_FLAGS_SKB_MODE but user calls bpf_get_link_xdp_id() with flags = XDP_FLAGS_DRV_MODE, that skb program will be returned. Fix it by returning info->prog_id only if user didn't specify flags. If flags is specified then return corresponding mode-specific-field from struct xdp_link_info. The initial error was introduced in commit 50db9f073188 ("libbpf: Add a support for getting xdp prog id on ifindex") and then refactored in 473f4e133a12 so 473f4e133a12 is used in the Fixes tag. Fixes: 473f4e133a12 ("libbpf: Add bpf_get_link_xdp_info() function to get more XDP information") Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/0e9e30490b44b447bb2bebc69c7135e7fe7e4e40.1586236080.git.rdna@fb.com --- tools/lib/bpf/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 9a14694176de..0b709fd10bba 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -321,7 +321,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) { - if (info->attach_mode != XDP_ATTACHED_MULTI) + if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) return info->prog_id; if (flags & XDP_FLAGS_DRV_MODE) return info->drv_prog_id; -- cgit From eb203f4b89c1a1a779d9781e49b568d2a712abc6 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 6 Apr 2020 22:09:46 -0700 Subject: selftests/bpf: Add test for bpf_get_link_xdp_id Add xdp_info selftest that makes sure that bpf_get_link_xdp_id returns valid prog_id for different input modes: * w/ and w/o flags when no program is attached; * w/ and w/o flags when one program is attached. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/2a9a6d1ce33b91ccc1aa3de6dba2d309f2062811.1586236080.git.rdna@fb.com --- tools/testing/selftests/bpf/prog_tests/xdp_info.c | 68 +++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_info.c diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c new file mode 100644 index 000000000000..d2d7a283d72f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#define IFINDEX_LO 1 + +void test_xdp_info(void) +{ + __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id; + const char *file = "./xdp_dummy.o"; + struct bpf_prog_info info = {}; + struct bpf_object *obj; + int err, prog_fd; + + /* Get prog_id for XDP_ATTACHED_NONE mode */ + + err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0); + if (CHECK(err, "get_xdp_none", "errno=%d\n", errno)) + return; + if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id)) + return; + + err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE); + if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno)) + return; + if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n", + prog_id)) + return; + + /* Setup prog */ + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); + if (CHECK(err, "get_prog_info", "errno=%d\n", errno)) + goto out_close; + + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); + if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno)) + goto out_close; + + /* Get prog_id for single prog mode */ + + err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0); + if (CHECK(err, "get_xdp", "errno=%d\n", errno)) + goto out; + if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n")) + goto out; + + err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE); + if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno)) + goto out; + if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n")) + goto out; + + err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE); + if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno)) + goto out; + if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id)) + goto out; + +out: + bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); +out_close: + bpf_object__close(obj); +} -- cgit From 045065f06f938d3171b3ffacb34453421a32c1e3 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Tue, 7 Apr 2020 22:55:25 +0000 Subject: net: sock.h: fix skb_steal_sock() kernel-doc Fix warnings related to kernel-doc notation, and wording in function description. Signed-off-by: Lothar Rubusch Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 6d84784d33fa..3e8c6d4b4b59 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2553,9 +2553,9 @@ sk_is_refcounted(struct sock *sk) } /** - * skb_steal_sock - * @skb to steal the socket from - * @refcounted is set to true if the socket is reference-counted + * skb_steal_sock - steal a socket from an sk_buff + * @skb: sk_buff to steal the socket from + * @refcounted: is set to true if the socket is reference-counted */ static inline struct sock * skb_steal_sock(struct sk_buff *skb, bool *refcounted) -- cgit From da722186f6549d752ea5b5fbc18111833c81a133 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Thu, 2 Apr 2020 15:51:27 +0200 Subject: net: fec: set GPR bit on suspend by DT configuration. On some SoCs, such as the i.MX6, it is necessary to set a bit in the SoC level GPR register before suspending for wake on lan to work. The fec platform callback sleep_mode_enable was intended to allow this but the platform implementation was NAK'd back in 2015 [1] This means that, currently, wake on lan is broken on mainline for the i.MX6 at least. So implement the required bit setting in the fec driver by itself by adding a new optional DT property indicating the GPR register and adding the offset and bit information to the driver. [1] https://www.spinics.net/lists/netdev/msg310922.html Signed-off-by: Martin Fuzzey Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 7 ++ drivers/net/ethernet/freescale/fec_main.c | 149 ++++++++++++++++++++++++------ 2 files changed, 127 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index bd898f5b4da5..e74dd1f86bba 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -488,6 +488,12 @@ struct fec_enet_priv_rx_q { struct sk_buff *rx_skbuff[RX_RING_SIZE]; }; +struct fec_stop_mode_gpr { + struct regmap *gpr; + u8 reg; + u8 bit; +}; + /* The FEC buffer descriptors track the ring buffers. The rx_bd_base and * tx_bd_base always point to the base of the buffer descriptors. The * cur_rx and cur_tx point to the currently available buffer. @@ -562,6 +568,7 @@ struct fec_enet_private { int hwts_tx_en; struct delayed_work time_keep; struct regulator *reg_phy; + struct fec_stop_mode_gpr stop_gpr; unsigned int tx_align; unsigned int rx_align; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c1c267b61647..dc6f8763a5d4 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -62,6 +62,8 @@ #include #include #include +#include +#include #include #include @@ -84,6 +86,56 @@ static void fec_enet_itr_coal_init(struct net_device *ndev); #define FEC_ENET_OPD_V 0xFFF0 #define FEC_MDIO_PM_TIMEOUT 100 /* ms */ +struct fec_devinfo { + u32 quirks; + u8 stop_gpr_reg; + u8 stop_gpr_bit; +}; + +static const struct fec_devinfo fec_imx25_info = { + .quirks = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | + FEC_QUIRK_HAS_FRREG, +}; + +static const struct fec_devinfo fec_imx27_info = { + .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, +}; + +static const struct fec_devinfo fec_imx28_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | + FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_FRREG, +}; + +static const struct fec_devinfo fec_imx6q_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | + FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | + FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 | + FEC_QUIRK_HAS_RACC, + .stop_gpr_reg = 0x34, + .stop_gpr_bit = 27, +}; + +static const struct fec_devinfo fec_mvf600_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC, +}; + +static const struct fec_devinfo fec_imx6x_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | + FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | + FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | + FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE, +}; + +static const struct fec_devinfo fec_imx6ul_info = { + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | + FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | + FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR007885 | + FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_COALESCE, +}; + static struct platform_device_id fec_devtype[] = { { /* keep it for coldfire */ @@ -91,39 +143,25 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | - FEC_QUIRK_HAS_FRREG, + .driver_data = (kernel_ulong_t)&fec_imx25_info, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, + .driver_data = (kernel_ulong_t)&fec_imx27_info, }, { .name = "imx28-fec", - .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | - FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | - FEC_QUIRK_HAS_FRREG, + .driver_data = (kernel_ulong_t)&fec_imx28_info, }, { .name = "imx6q-fec", - .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | - FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | - FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 | - FEC_QUIRK_HAS_RACC, + .driver_data = (kernel_ulong_t)&fec_imx6q_info, }, { .name = "mvf600-fec", - .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC, + .driver_data = (kernel_ulong_t)&fec_mvf600_info, }, { .name = "imx6sx-fec", - .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | - FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | - FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | - FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | - FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE, + .driver_data = (kernel_ulong_t)&fec_imx6x_info, }, { .name = "imx6ul-fec", - .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | - FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | - FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR007885 | - FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | - FEC_QUIRK_HAS_COALESCE, + .driver_data = (kernel_ulong_t)&fec_imx6ul_info, }, { /* sentinel */ } @@ -1092,11 +1130,28 @@ fec_restart(struct net_device *ndev) } +static void fec_enet_stop_mode(struct fec_enet_private *fep, bool enabled) +{ + struct fec_platform_data *pdata = fep->pdev->dev.platform_data; + struct fec_stop_mode_gpr *stop_gpr = &fep->stop_gpr; + + if (stop_gpr->gpr) { + if (enabled) + regmap_update_bits(stop_gpr->gpr, stop_gpr->reg, + BIT(stop_gpr->bit), + BIT(stop_gpr->bit)); + else + regmap_update_bits(stop_gpr->gpr, stop_gpr->reg, + BIT(stop_gpr->bit), 0); + } else if (pdata && pdata->sleep_mode_enable) { + pdata->sleep_mode_enable(enabled); + } +} + static void fec_stop(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_platform_data *pdata = fep->pdev->dev.platform_data; u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8); u32 val; @@ -1125,9 +1180,7 @@ fec_stop(struct net_device *ndev) val = readl(fep->hwp + FEC_ECNTRL); val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); writel(val, fep->hwp + FEC_ECNTRL); - - if (pdata && pdata->sleep_mode_enable) - pdata->sleep_mode_enable(true); + fec_enet_stop_mode(fep, true); } writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); @@ -3398,6 +3451,37 @@ static int fec_enet_get_irq_cnt(struct platform_device *pdev) return irq_cnt; } +static int fec_enet_init_stop_mode(struct fec_enet_private *fep, + struct fec_devinfo *dev_info, + struct device_node *np) +{ + struct device_node *gpr_np; + int ret = 0; + + if (!dev_info) + return 0; + + gpr_np = of_parse_phandle(np, "gpr", 0); + if (!gpr_np) + return 0; + + fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np); + if (IS_ERR(fep->stop_gpr.gpr)) { + dev_err(&fep->pdev->dev, "could not find gpr regmap\n"); + ret = PTR_ERR(fep->stop_gpr.gpr); + fep->stop_gpr.gpr = NULL; + goto out; + } + + fep->stop_gpr.reg = dev_info->stop_gpr_reg; + fep->stop_gpr.bit = dev_info->stop_gpr_bit; + +out: + of_node_put(gpr_np); + + return ret; +} + static int fec_probe(struct platform_device *pdev) { @@ -3413,6 +3497,7 @@ fec_probe(struct platform_device *pdev) int num_rx_qs; char irq_name[8]; int irq_cnt; + struct fec_devinfo *dev_info; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -3430,7 +3515,9 @@ fec_probe(struct platform_device *pdev) of_id = of_match_device(fec_dt_ids, &pdev->dev); if (of_id) pdev->id_entry = of_id->data; - fep->quirks = pdev->id_entry->driver_data; + dev_info = (struct fec_devinfo *)pdev->id_entry->driver_data; + if (dev_info) + fep->quirks = dev_info->quirks; fep->netdev = ndev; fep->num_rx_queues = num_rx_qs; @@ -3464,6 +3551,10 @@ fec_probe(struct platform_device *pdev) if (of_get_property(np, "fsl,magic-packet", NULL)) fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET; + ret = fec_enet_init_stop_mode(fep, dev_info, np); + if (ret) + goto failed_stop_mode; + phy_node = of_parse_phandle(np, "phy-handle", 0); if (!phy_node && of_phy_is_fixed_link(np)) { ret = of_phy_register_fixed_link(np); @@ -3632,6 +3723,7 @@ failed_clk: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(phy_node); +failed_stop_mode: failed_phy: dev_id--; failed_ioremap: @@ -3709,7 +3801,6 @@ static int __maybe_unused fec_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_platform_data *pdata = fep->pdev->dev.platform_data; int ret; int val; @@ -3727,8 +3818,8 @@ static int __maybe_unused fec_resume(struct device *dev) goto failed_clk; } if (fep->wol_flag & FEC_WOL_FLAG_ENABLE) { - if (pdata && pdata->sleep_mode_enable) - pdata->sleep_mode_enable(false); + fec_enet_stop_mode(fep, false); + val = readl(fep->hwp + FEC_ECNTRL); val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP); writel(val, fep->hwp + FEC_ECNTRL); -- cgit From 4141f1a40fc0789f6fd4330e171e1edf155426aa Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Thu, 2 Apr 2020 15:51:28 +0200 Subject: ARM: dts: imx6: Use gpc for FEC interrupt controller to fix wake on LAN. In order to wake from suspend by ethernet magic packets the GPC must be used as intc does not have wakeup functionality. But the FEC DT node currently uses interrupt-extended, specificying intc, thus breaking WoL. This problem is probably fallout from the stacked domain conversion as intc used to chain to GPC. So replace "interrupts-extended" by "interrupts" to use the default parent which is GPC. Fixes: b923ff6af0d5 ("ARM: imx6: convert GPC to stacked domains") Signed-off-by: Martin Fuzzey Signed-off-by: David S. Miller --- arch/arm/boot/dts/imx6qdl.dtsi | 5 ++--- arch/arm/boot/dts/imx6qp.dtsi | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 47982889d774..0da4cc29effd 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -1039,9 +1039,8 @@ compatible = "fsl,imx6q-fec"; reg = <0x02188000 0x4000>; interrupt-names = "int0", "pps"; - interrupts-extended = - <&intc 0 118 IRQ_TYPE_LEVEL_HIGH>, - <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, + <0 119 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET_REF>; diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 93b89dc1f53b..b310f13a53f2 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -77,7 +77,6 @@ }; &fec { - /delete-property/interrupts-extended; interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, <0 119 IRQ_TYPE_LEVEL_HIGH>; }; -- cgit From 70f268588a8c4e7596d9031afcbf2e78cf9c757d Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Thu, 2 Apr 2020 15:51:29 +0200 Subject: dt-bindings: fec: document the new gpr property. This property allows the gpr register bit to be defined for wake on lan support. Signed-off-by: Martin Fuzzey Reviewed-by: Fugang Duan Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/fsl-fec.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index 5b88fae0307d..ff8b0f211aa1 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -22,6 +22,8 @@ Optional properties: - fsl,err006687-workaround-present: If present indicates that the system has the hardware workaround for ERR006687 applied and does not need a software workaround. +- gpr: phandle of SoC general purpose register mode. Required for wake on LAN + on some SoCs -interrupt-names: names of the interrupts listed in interrupts property in the same order. The defaults if not specified are __Number of interrupts__ __Default__ -- cgit From be8ae92f5c25f0896969bbc049e9844f9dcd53f1 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Thu, 2 Apr 2020 15:51:30 +0200 Subject: ARM: dts: imx6: add fec gpr property. This is required for wake on lan on i.MX6 Signed-off-by: Martin Fuzzey Reviewed-by: Fugang Duan Signed-off-by: David S. Miller --- arch/arm/boot/dts/imx6qdl.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 0da4cc29effd..98da446aa0f2 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -1045,6 +1045,7 @@ <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET_REF>; clock-names = "ipg", "ahb", "ptp"; + gpr = <&gpr>; status = "disabled"; }; -- cgit From b93cfb9cd3af3adc9ba4854f178d5300f7544d3e Mon Sep 17 00:00:00 2001 From: Tim Stallard Date: Fri, 3 Apr 2020 21:22:57 +0100 Subject: net: icmp6: do not select saddr from iif when route has prefsrc set Since commit fac6fce9bdb5 ("net: icmp6: provide input address for traceroute6") ICMPv6 errors have source addresses from the ingress interface. However, this overrides when source address selection is influenced by setting preferred source addresses on routes. This can result in ICMP errors being lost to upstream BCP38 filters when the wrong source addresses are used, breaking path MTU discovery and traceroute. This patch sets the modified source address selection to only take place when the route used has no prefsrc set. It can be tested with: ip link add v1 type veth peer name v2 ip netns add test ip netns exec test ip link set lo up ip link set v2 netns test ip link set v1 up ip netns exec test ip link set v2 up ip addr add 2001:db8::1/64 dev v1 nodad ip addr add 2001:db8::3 dev v1 nodad ip netns exec test ip addr add 2001:db8::2/64 dev v2 nodad ip netns exec test ip route add unreachable 2001:db8:1::1 ip netns exec test ip addr add 2001:db8:100::1 dev lo ip netns exec test ip route add 2001:db8::1 dev v2 src 2001:db8:100::1 ip route add 2001:db8:1000::1 via 2001:db8::2 traceroute6 -s 2001:db8::1 2001:db8:1000::1 traceroute6 -s 2001:db8::3 2001:db8:1000::1 ip netns delete test Output before: $ traceroute6 -s 2001:db8::1 2001:db8:1000::1 traceroute to 2001:db8:1000::1 (2001:db8:1000::1), 30 hops max, 80 byte packets 1 2001:db8::2 (2001:db8::2) 0.843 ms !N 0.396 ms !N 0.257 ms !N $ traceroute6 -s 2001:db8::3 2001:db8:1000::1 traceroute to 2001:db8:1000::1 (2001:db8:1000::1), 30 hops max, 80 byte packets 1 2001:db8::2 (2001:db8::2) 0.772 ms !N 0.257 ms !N 0.357 ms !N After: $ traceroute6 -s 2001:db8::1 2001:db8:1000::1 traceroute to 2001:db8:1000::1 (2001:db8:1000::1), 30 hops max, 80 byte packets 1 2001:db8:100::1 (2001:db8:100::1) 8.885 ms !N 0.310 ms !N 0.174 ms !N $ traceroute6 -s 2001:db8::3 2001:db8:1000::1 traceroute to 2001:db8:1000::1 (2001:db8:1000::1), 30 hops max, 80 byte packets 1 2001:db8::2 (2001:db8::2) 1.403 ms !N 0.205 ms !N 0.313 ms !N Fixes: fac6fce9bdb5 ("net: icmp6: provide input address for traceroute6") Signed-off-by: Tim Stallard Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2688f3e82165..fc5000370030 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -229,6 +229,25 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, return res; } +static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, + struct flowi6 *fl6) +{ + struct net *net = sock_net(sk); + struct dst_entry *dst; + bool res = false; + + dst = ip6_route_output(net, sk, fl6); + if (!dst->error) { + struct rt6_info *rt = (struct rt6_info *)dst; + struct in6_addr prefsrc; + + rt6_get_prefsrc(rt, &prefsrc); + res = !ipv6_addr_any(&prefsrc); + } + dst_release(dst); + return res; +} + /* * an inline helper for the "simple" if statement below * checks if parameter problem report is caused by an @@ -527,7 +546,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, saddr = force_saddr; if (saddr) { fl6.saddr = *saddr; - } else { + } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { /* select a more meaningful saddr from input if */ struct net_device *in_netdev; -- cgit From 03e2a984b6165621f287fadf5f4b5cd8b58dcaba Mon Sep 17 00:00:00 2001 From: Tim Stallard Date: Fri, 3 Apr 2020 21:26:21 +0100 Subject: net: ipv6: do not consider routes via gateways for anycast address check The behaviour for what is considered an anycast address changed in commit 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception"). This now considers the first address in a subnet where there is a route via a gateway to be an anycast address. This breaks path MTU discovery and traceroutes when a host in a remote network uses the address at the start of a prefix (eg 2600:: advertised as 2600::/48 in the DFZ) as ICMP errors will not be sent to anycast addresses. This patch excludes any routes with a gateway, or via point to point links, like the behaviour previously from rt6_is_gw_or_nonexthop in net/ipv6/route.c. This can be tested with: ip link add v1 type veth peer name v2 ip netns add test ip netns exec test ip link set lo up ip link set v2 netns test ip link set v1 up ip netns exec test ip link set v2 up ip addr add 2001:db8::1/64 dev v1 nodad ip addr add 2001:db8:100:: dev lo nodad ip netns exec test ip addr add 2001:db8::2/64 dev v2 nodad ip netns exec test ip route add unreachable 2001:db8:1::1 ip netns exec test ip route add 2001:db8:100::/64 via 2001:db8::1 ip netns exec test sysctl net.ipv6.conf.all.forwarding=1 ip route add 2001:db8:1::1 via 2001:db8::2 ping -I 2001:db8::1 2001:db8:1::1 -c1 ping -I 2001:db8:100:: 2001:db8:1::1 -c1 ip addr delete 2001:db8:100:: dev lo ip netns delete test Currently the first ping will get back a destination unreachable ICMP error, but the second will never get a response, with "icmp6_send: acast source" logged. After this patch, both get destination unreachable ICMP replies. Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception") Signed-off-by: Tim Stallard Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f7543c095b33..9947eb1e9eb6 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -254,6 +254,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen < 127 && + !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } -- cgit From 84d2f7b708c374a15a2abe092a74e0e47d018286 Mon Sep 17 00:00:00 2001 From: René van Dorst Date: Mon, 6 Apr 2020 05:42:53 +0800 Subject: net: dsa: mt7530: move mt7623 settings out off the mt7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving mt7623 logic out off mt7530, is required to make hardware setting consistent after we introduce phylink to mtk driver. Fixes: ca366d6c889b ("net: dsa: mt7530: Convert to PHYLINK API") Reviewed-by: Sean Wang Tested-by: Sean Wang Signed-off-by: René van Dorst Tested-by: Frank Wunderlich Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 85 ------------------------------------------------ drivers/net/dsa/mt7530.h | 10 ------ 2 files changed, 95 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 2d0d91db0ddb..84391c8a0e16 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -66,58 +66,6 @@ static const struct mt7530_mib_desc mt7530_mib[] = { MIB_DESC(1, 0xb8, "RxArlDrop"), }; -static int -mt7623_trgmii_write(struct mt7530_priv *priv, u32 reg, u32 val) -{ - int ret; - - ret = regmap_write(priv->ethernet, TRGMII_BASE(reg), val); - if (ret < 0) - dev_err(priv->dev, - "failed to priv write register\n"); - return ret; -} - -static u32 -mt7623_trgmii_read(struct mt7530_priv *priv, u32 reg) -{ - int ret; - u32 val; - - ret = regmap_read(priv->ethernet, TRGMII_BASE(reg), &val); - if (ret < 0) { - dev_err(priv->dev, - "failed to priv read register\n"); - return ret; - } - - return val; -} - -static void -mt7623_trgmii_rmw(struct mt7530_priv *priv, u32 reg, - u32 mask, u32 set) -{ - u32 val; - - val = mt7623_trgmii_read(priv, reg); - val &= ~mask; - val |= set; - mt7623_trgmii_write(priv, reg, val); -} - -static void -mt7623_trgmii_set(struct mt7530_priv *priv, u32 reg, u32 val) -{ - mt7623_trgmii_rmw(priv, reg, 0, val); -} - -static void -mt7623_trgmii_clear(struct mt7530_priv *priv, u32 reg, u32 val) -{ - mt7623_trgmii_rmw(priv, reg, val, 0); -} - static int core_read_mmd_indirect(struct mt7530_priv *priv, int prtad, int devad) { @@ -530,27 +478,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, int mode) for (i = 0 ; i < NUM_TRGMII_CTRL; i++) mt7530_rmw(priv, MT7530_TRGMII_RD(i), RD_TAP_MASK, RD_TAP(16)); - else - if (priv->id != ID_MT7621) - mt7623_trgmii_set(priv, GSW_INTF_MODE, - INTF_MODE_TRGMII); - - return 0; -} - -static int -mt7623_pad_clk_setup(struct dsa_switch *ds) -{ - struct mt7530_priv *priv = ds->priv; - int i; - - for (i = 0 ; i < NUM_TRGMII_CTRL; i++) - mt7623_trgmii_write(priv, GSW_TRGMII_TD_ODT(i), - TD_DM_DRVP(8) | TD_DM_DRVN(8)); - - mt7623_trgmii_set(priv, GSW_TRGMII_RCK_CTRL, RX_RST | RXC_DQSISEL); - mt7623_trgmii_clear(priv, GSW_TRGMII_RCK_CTRL, RX_RST); - return 0; } @@ -1303,10 +1230,6 @@ mt7530_setup(struct dsa_switch *ds) dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; if (priv->id == ID_MT7530) { - priv->ethernet = syscon_node_to_regmap(dn); - if (IS_ERR(priv->ethernet)) - return PTR_ERR(priv->ethernet); - regulator_set_voltage(priv->core_pwr, 1000000, 1000000); ret = regulator_enable(priv->core_pwr); if (ret < 0) { @@ -1468,14 +1391,6 @@ static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port, /* Setup TX circuit incluing relevant PAD and driving */ mt7530_pad_clk_setup(ds, state->interface); - if (priv->id == ID_MT7530) { - /* Setup RX circuit, relevant PAD and driving on the - * host which must be placed after the setup on the - * device side is all finished. - */ - mt7623_pad_clk_setup(ds); - } - priv->p6_interface = state->interface; break; default: diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index ef9b52f3152b..4aef6024441b 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -277,7 +277,6 @@ enum mt7530_vlan_port_attr { /* Registers for TRGMII on the both side */ #define MT7530_TRGMII_RCK_CTRL 0x7a00 -#define GSW_TRGMII_RCK_CTRL 0x300 #define RX_RST BIT(31) #define RXC_DQSISEL BIT(30) #define DQSI1_TAP_MASK (0x7f << 8) @@ -286,31 +285,24 @@ enum mt7530_vlan_port_attr { #define DQSI0_TAP(x) ((x) & 0x7f) #define MT7530_TRGMII_RCK_RTT 0x7a04 -#define GSW_TRGMII_RCK_RTT 0x304 #define DQS1_GATE BIT(31) #define DQS0_GATE BIT(30) #define MT7530_TRGMII_RD(x) (0x7a10 + (x) * 8) -#define GSW_TRGMII_RD(x) (0x310 + (x) * 8) #define BSLIP_EN BIT(31) #define EDGE_CHK BIT(30) #define RD_TAP_MASK 0x7f #define RD_TAP(x) ((x) & 0x7f) -#define GSW_TRGMII_TXCTRL 0x340 #define MT7530_TRGMII_TXCTRL 0x7a40 #define TRAIN_TXEN BIT(31) #define TXC_INV BIT(30) #define TX_RST BIT(28) #define MT7530_TRGMII_TD_ODT(i) (0x7a54 + 8 * (i)) -#define GSW_TRGMII_TD_ODT(i) (0x354 + 8 * (i)) #define TD_DM_DRVP(x) ((x) & 0xf) #define TD_DM_DRVN(x) (((x) & 0xf) << 4) -#define GSW_INTF_MODE 0x390 -#define INTF_MODE_TRGMII BIT(1) - #define MT7530_TRGMII_TCK_CTRL 0x7a78 #define TCK_TAP(x) (((x) & 0xf) << 8) @@ -443,7 +435,6 @@ static const char *p5_intf_modes(unsigned int p5_interface) * @ds: The pointer to the dsa core structure * @bus: The bus used for the device and built-in PHY * @rstc: The pointer to reset control used by MCM - * @ethernet: The regmap used for access TRGMII-based registers * @core_pwr: The power supplied into the core * @io_pwr: The power supplied into the I/O * @reset: The descriptor for GPIO line tied to its reset pin @@ -460,7 +451,6 @@ struct mt7530_priv { struct dsa_switch *ds; struct mii_bus *bus; struct reset_control *rstc; - struct regmap *ethernet; struct regulator *core_pwr; struct regulator *io_pwr; struct gpio_desc *reset; -- cgit From a5d75538295b06bc6ade1b9da07b9bee57d1c677 Mon Sep 17 00:00:00 2001 From: René van Dorst Date: Mon, 6 Apr 2020 05:42:54 +0800 Subject: net: ethernet: mediatek: move mt7623 settings out off the mt7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving mt7623 logic out off mt7530, is required to make hardware setting consistent after we introduce phylink to mtk driver. Fixes: b8fc9f30821e ("net: ethernet: mediatek: Add basic PHYLINK support") Reviewed-by: Sean Wang Tested-by: Sean Wang Signed-off-by: René van Dorst Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 24 +++++++++++++++++++++++- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 8 ++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8d28f90acfe7..09047109d0da 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -65,6 +65,17 @@ u32 mtk_r32(struct mtk_eth *eth, unsigned reg) return __raw_readl(eth->base + reg); } +u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned reg) +{ + u32 val; + + val = mtk_r32(eth, reg); + val &= ~mask; + val |= set; + mtk_w32(eth, val, reg); + return reg; +} + static int mtk_mdio_busy_wait(struct mtk_eth *eth) { unsigned long t_start = jiffies; @@ -193,7 +204,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, struct mtk_mac *mac = container_of(config, struct mtk_mac, phylink_config); struct mtk_eth *eth = mac->hw; - u32 mcr_cur, mcr_new, sid; + u32 mcr_cur, mcr_new, sid, i; int val, ge_mode, err; /* MT76x8 has no hardware settings between for the MAC */ @@ -255,6 +266,17 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, PHY_INTERFACE_MODE_TRGMII) mtk_gmac0_rgmii_adjust(mac->hw, state->speed); + + /* mt7623_pad_clk_setup */ + for (i = 0 ; i < NUM_TRGMII_CTRL; i++) + mtk_w32(mac->hw, + TD_DM_DRVP(8) | TD_DM_DRVN(8), + TRGMII_TD_ODT(i)); + + /* Assert/release MT7623 RXC reset */ + mtk_m32(mac->hw, 0, RXC_RST | RXC_DQSISEL, + TRGMII_RCK_CTRL); + mtk_m32(mac->hw, RXC_RST, 0, TRGMII_RCK_CTRL); } } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 85830fe14a1b..454cfcd465fd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -352,10 +352,13 @@ #define DQSI0(x) ((x << 0) & GENMASK(6, 0)) #define DQSI1(x) ((x << 8) & GENMASK(14, 8)) #define RXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define RXC_RST BIT(31) #define RXC_DQSISEL BIT(30) #define RCK_CTRL_RGMII_1000 (RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16)) #define RCK_CTRL_RGMII_10_100 RXCTL_DMWTLAT(2) +#define NUM_TRGMII_CTRL 5 + /* TRGMII RXC control register */ #define TRGMII_TCK_CTRL 0x10340 #define TXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) @@ -363,6 +366,11 @@ #define TCK_CTRL_RGMII_1000 TXCTL_DMWTLAT(2) #define TCK_CTRL_RGMII_10_100 (TXC_INV | TXCTL_DMWTLAT(2)) +/* TRGMII TX Drive Strength */ +#define TRGMII_TD_ODT(i) (0x10354 + 8 * (i)) +#define TD_DM_DRVP(x) ((x) & 0xf) +#define TD_DM_DRVN(x) (((x) & 0xf) << 4) + /* TRGMII Interface mode register */ #define INTF_MODE 0x10390 #define TRGMII_INTF_DIS BIT(0) -- cgit From a4837980fd9fa4c70a821d11831698901baef56b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 6 Apr 2020 14:39:32 +0300 Subject: net: revert default NAPI poll timeout to 2 jiffies For HZ < 1000 timeout 2000us rounds up to 1 jiffy but expires randomly because next timer interrupt could come shortly after starting softirq. For commonly used CONFIG_HZ=1000 nothing changes. Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") Reported-by: Dmitry Yakunin Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9c9e763bfe0e..df8097b8e286 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4140,7 +4140,8 @@ EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; -unsigned int __read_mostly netdev_budget_usecs = 2000; +/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ +unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ -- cgit From a080da6ac7fa13282f1be8705cc67ceacd999ac3 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 6 Apr 2020 18:36:56 +0300 Subject: net: sched: Fix setting last executed chain on skb extension After driver sets the missed chain on the tc skb extension it is consumed (deleted) by tc_classify_ingress and tc jumps to that chain. If tc now misses on this chain (either no match, or no goto action), then last executed chain remains 0, and the skb extension is not re-added, and the next datapath (ovs) will start from 0. Fix that by setting last executed chain to the chain read from the skb extension, so if there is a miss, we set it back. Fixes: af699626ee26 ("net: sched: Support specifying a starting chain via tc skb ext") Reviewed-by: Oz Shlomo Reviewed-by: Jiri Pirko Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f6a3b969ead0..55bd1429678f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1667,6 +1667,7 @@ int tcf_classify_ingress(struct sk_buff *skb, skb_ext_del(skb, TC_SKB_EXT); tp = rcu_dereference_bh(fchain->filter_chain); + last_executed_chain = fchain->index; } ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, -- cgit From ab74110205543a8d57eff64c6af64235aa23c09b Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Mon, 6 Apr 2020 21:29:20 +0000 Subject: Documentation: mdio_bus.c - fix warnings Fix wrong parameter description and related warnings at 'make htmldocs'. Signed-off-by: Lothar Rubusch Reviewed-by: Florian Fainelli Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 522760c8bca6..7a4eb3f2cb74 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -464,7 +464,7 @@ static struct class mdio_bus_class = { /** * mdio_find_bus - Given the name of a mdiobus, find the mii_bus. - * @mdio_bus_np: Pointer to the mii_bus. + * @mdio_name: The name of a mdiobus. * * Returns a reference to the mii_bus, or NULL if none found. The * embedded struct device will have its reference count incremented, -- cgit From 4faab8c446def7667adf1f722456c2f4c304069c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 7 Apr 2020 13:23:21 +0000 Subject: hsr: check protocol version in hsr_newlink() In the current hsr code, only 0 and 1 protocol versions are valid. But current hsr code doesn't check the version, which is received by userspace. Test commands: ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add hsr0 type hsr slave1 dummy0 slave2 dummy1 version 4 In the test commands, version 4 is invalid. So, the command should be failed. After this patch, following error will occur. "Error: hsr: Only versions 0..1 are supported." Fixes: ee1c27977284 ("net/hsr: Added support for HSR v1") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_netlink.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 5465a395da04..1decb25f6764 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -69,10 +69,16 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); - if (!data[IFLA_HSR_VERSION]) + if (!data[IFLA_HSR_VERSION]) { hsr_version = 0; - else + } else { hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); + if (hsr_version > 1) { + NL_SET_ERR_MSG_MOD(extack, + "Only versions 0..1 are supported"); + return -EINVAL; + } + } return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack); } -- cgit From cb9533d1c683219bc982905046c05e24bfaf4996 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Tue, 7 Apr 2020 13:13:25 -0400 Subject: tc-testing: remove duplicate code in tdc.py In set_operation_mode() function remove duplicated check for args.list parameter, which is already done one line before. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index e566c70e64a1..a3e43189d940 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -713,9 +713,8 @@ def set_operation_mode(pm, parser, args, remaining): exit(0) if args.list: - if args.list: - list_test_cases(alltests) - exit(0) + list_test_cases(alltests) + exit(0) if len(alltests): req_plugins = pm.get_required_plugins(alltests) -- cgit From 7e4d47596b686bb2714d05f8774ada884ec8983d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 8 Apr 2020 09:19:11 -0700 Subject: ionic: replay filters after fw upgrade The NIC's filters are lost in the midst of the fw-upgrade so we need to replay them into the FW. We also remove the unused ionic_rx_filter_del() function. Fixes: c672412f6172 ("ionic: remove lifs on fw reset") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 12 +++-- .../net/ethernet/pensando/ionic/ionic_rx_filter.c | 52 +++++++++++++++++----- .../net/ethernet/pensando/ionic/ionic_rx_filter.h | 2 +- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 4b8a76098ca3..f8f437aec027 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2127,6 +2127,8 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) if (lif->registered) ionic_lif_set_netdev_info(lif); + ionic_rx_filter_replay(lif); + if (netif_running(lif->netdev)) { err = ionic_txrx_alloc(lif); if (err) @@ -2206,9 +2208,9 @@ static void ionic_lif_deinit(struct ionic_lif *lif) if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { cancel_work_sync(&lif->deferred.work); cancel_work_sync(&lif->tx_timeout_work); + ionic_rx_filters_deinit(lif); } - ionic_rx_filters_deinit(lif); if (lif->netdev->features & NETIF_F_RXHASH) ionic_lif_rss_deinit(lif); @@ -2421,9 +2423,11 @@ static int ionic_lif_init(struct ionic_lif *lif) if (err) goto err_out_notifyq_deinit; - err = ionic_rx_filters_init(lif); - if (err) - goto err_out_notifyq_deinit; + if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { + err = ionic_rx_filters_init(lif); + if (err) + goto err_out_notifyq_deinit; + } err = ionic_station_set(lif); if (err) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 7a093f148ee5..f3c7dd1596ee 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -17,17 +17,49 @@ void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f) devm_kfree(dev, f); } -int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f) +void ionic_rx_filter_replay(struct ionic_lif *lif) { - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_filter_del = { - .opcode = IONIC_CMD_RX_FILTER_DEL, - .filter_id = cpu_to_le32(f->filter_id), - }, - }; - - return ionic_adminq_post_wait(lif, &ctx); + struct ionic_rx_filter_add_cmd *ac; + struct ionic_admin_ctx ctx; + struct ionic_rx_filter *f; + struct hlist_head *head; + struct hlist_node *tmp; + unsigned int i; + int err = 0; + + ac = &ctx.cmd.rx_filter_add; + + for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) { + head = &lif->rx_filters.by_id[i]; + hlist_for_each_entry_safe(f, tmp, head, by_id) { + ctx.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work); + memcpy(ac, &f->cmd, sizeof(f->cmd)); + dev_dbg(&lif->netdev->dev, "replay filter command:\n"); + dynamic_hex_dump("cmd ", DUMP_PREFIX_OFFSET, 16, 1, + &ctx.cmd, sizeof(ctx.cmd), true); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err) { + switch (le16_to_cpu(ac->match)) { + case IONIC_RX_FILTER_MATCH_VLAN: + netdev_info(lif->netdev, "Replay failed - %d: vlan %d\n", + err, + le16_to_cpu(ac->vlan.vlan)); + break; + case IONIC_RX_FILTER_MATCH_MAC: + netdev_info(lif->netdev, "Replay failed - %d: mac %pM\n", + err, ac->mac.addr); + break; + case IONIC_RX_FILTER_MATCH_MAC_VLAN: + netdev_info(lif->netdev, "Replay failed - %d: vlan %d mac %pM\n", + err, + le16_to_cpu(ac->vlan.vlan), + ac->mac.addr); + break; + } + } + } + } } int ionic_rx_filters_init(struct ionic_lif *lif) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h index b6aec9c19918..cf8f4c0a961c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h @@ -24,7 +24,7 @@ struct ionic_rx_filters { }; void ionic_rx_filter_free(struct ionic_lif *lif, struct ionic_rx_filter *f); -int ionic_rx_filter_del(struct ionic_lif *lif, struct ionic_rx_filter *f); +void ionic_rx_filter_replay(struct ionic_lif *lif); int ionic_rx_filters_init(struct ionic_lif *lif); void ionic_rx_filters_deinit(struct ionic_lif *lif); int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, -- cgit From 216902ae770e21e739cd3b530b0b3ab3c28641d3 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 8 Apr 2020 09:19:12 -0700 Subject: ionic: set station addr only if needed The code was working too hard and in some cases was trying to delete filters that weren't there, generating a potentially misleading error message. IONIC_CMD_RX_FILTER_DEL (32) failed: IONIC_RC_ENOENT (-2) Fixes: 2a654540be10 ("ionic: Add Rx filter and rx_mode ndo support") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 32 +++++++++++++++---------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f8f437aec027..5acf4f46c268 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2341,24 +2341,30 @@ static int ionic_station_set(struct ionic_lif *lif) err = ionic_adminq_post_wait(lif, &ctx); if (err) return err; - + netdev_dbg(lif->netdev, "found initial MAC addr %pM\n", + ctx.comp.lif_getattr.mac); if (is_zero_ether_addr(ctx.comp.lif_getattr.mac)) return 0; - memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); - addr.sa_family = AF_INET; - err = eth_prepare_mac_addr_change(netdev, &addr); - if (err) { - netdev_warn(lif->netdev, "ignoring bad MAC addr from NIC %pM - err %d\n", - addr.sa_data, err); - return 0; - } + if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) { + memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); + addr.sa_family = AF_INET; + err = eth_prepare_mac_addr_change(netdev, &addr); + if (err) { + netdev_warn(lif->netdev, "ignoring bad MAC addr from NIC %pM - err %d\n", + addr.sa_data, err); + return 0; + } - netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n", - netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, false); + if (!is_zero_ether_addr(netdev->dev_addr)) { + netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n", + netdev->dev_addr); + ionic_lif_addr(lif, netdev->dev_addr, false); + } + + eth_commit_mac_addr_change(netdev, &addr); + } - eth_commit_mac_addr_change(netdev, &addr); netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); ionic_lif_addr(lif, netdev->dev_addr, true); -- cgit From 2abe05234f2e892728c388169631e4b99f354c86 Mon Sep 17 00:00:00 2001 From: Michael Weiß Date: Tue, 7 Apr 2020 13:11:48 +0200 Subject: l2tp: Allow management of tunnels and session in user namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creation and management of L2TPv3 tunnels and session through netlink requires CAP_NET_ADMIN. However, a process with CAP_NET_ADMIN in a non-initial user namespace gets an EPERM due to the use of the genetlink GENL_ADMIN_PERM flag. Thus, management of L2TP VPNs inside an unprivileged container won't work. We replaced the GENL_ADMIN_PERM by the GENL_UNS_ADMIN_PERM flag similar to other network modules which also had this problem, e.g., openvswitch (commit 4a92602aa1cd "openvswitch: allow management from inside user namespaces") and nl80211 (commit 5617c6cd6f844 "nl80211: Allow privileged operations from user namespaces"). I tested this in the container runtime trustm3 (trustm3.github.io) and was able to create l2tp tunnels and sessions in unpriviliged (user namespaced) containers using a private network namespace. For other runtimes such as docker or lxc this should work, too. Signed-off-by: Michael Weiß Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f5a9bdc4980c..ebb381c3f1b9 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -920,51 +920,51 @@ static const struct genl_ops l2tp_nl_ops[] = { .cmd = L2TP_CMD_TUNNEL_CREATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_create, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_DELETE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_delete, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_MODIFY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_modify, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_get, .dumpit = l2tp_nl_cmd_tunnel_dump, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_CREATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_create, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_DELETE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_delete, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_MODIFY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_modify, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_get, .dumpit = l2tp_nl_cmd_session_dump, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, }; -- cgit From f691a25ce5e5e405156ad4091c8e660b2622b7ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Apr 2020 20:54:43 +0200 Subject: net/tls: fix const assignment warning Building with some experimental patches, I came across a warning in the tls code: include/linux/compiler.h:215:30: warning: assignment discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] 215 | *(volatile typeof(x) *)&(x) = (val); \ | ^ net/tls/tls_main.c:650:4: note: in expansion of macro 'smp_store_release' 650 | smp_store_release(&saved_tcpv4_prot, prot); This appears to be a legitimate warning about assigning a const pointer into the non-const 'saved_tcpv4_prot' global. Annotate both the ipv4 and ipv6 pointers 'const' to make the code internally consistent. Fixes: 5bb4c45d466c ("net/tls: Read sk_prot once when building tls proto ops") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/tls/tls_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 156efce50dbd..0e989005bdc2 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -56,9 +56,9 @@ enum { TLS_NUM_PROTS, }; -static struct proto *saved_tcpv6_prot; +static const struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); -static struct proto *saved_tcpv4_prot; +static const struct proto *saved_tcpv4_prot; static DEFINE_MUTEX(tcpv4_prot_mutex); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; -- cgit From 8c702a53bb0a79bfa203ba21ef1caba43673c5b7 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 30 Mar 2020 10:21:49 +0300 Subject: net/mlx5: Fix frequent ioread PCI access during recovery High frequency of PCI ioread calls during recovery flow may cause the following trace on powerpc: [ 248.670288] EEH: 2100000 reads ignored for recovering device at location=Slot1 driver=mlx5_core pci addr=0000:01:00.1 [ 248.670331] EEH: Might be infinite loop in mlx5_core driver [ 248.670361] CPU: 2 PID: 35247 Comm: kworker/u192:11 Kdump: loaded Tainted: G OE ------------ 4.14.0-115.14.1.el7a.ppc64le #1 [ 248.670425] Workqueue: mlx5_health0000:01:00.1 health_recover_work [mlx5_core] [ 248.670471] Call Trace: [ 248.670492] [c00020391c11b960] [c000000000c217ac] dump_stack+0xb0/0xf4 (unreliable) [ 248.670548] [c00020391c11b9a0] [c000000000045818] eeh_check_failure+0x5c8/0x630 [ 248.670631] [c00020391c11ba50] [c00000000068fce4] ioread32be+0x114/0x1c0 [ 248.670692] [c00020391c11bac0] [c00800000dd8b400] mlx5_error_sw_reset+0x160/0x510 [mlx5_core] [ 248.670752] [c00020391c11bb60] [c00800000dd75824] mlx5_disable_device+0x34/0x1d0 [mlx5_core] [ 248.670822] [c00020391c11bbe0] [c00800000dd8affc] health_recover_work+0x11c/0x3c0 [mlx5_core] [ 248.670891] [c00020391c11bc80] [c000000000164fcc] process_one_work+0x1bc/0x5f0 [ 248.670955] [c00020391c11bd20] [c000000000167f8c] worker_thread+0xac/0x6b0 [ 248.671015] [c00020391c11bdc0] [c000000000171618] kthread+0x168/0x1b0 [ 248.671067] [c00020391c11be30] [c00000000000b65c] ret_from_kernel_thread+0x5c/0x80 Reduce the PCI ioread frequency during recovery by using msleep() instead of cond_resched() Fixes: 3e5b72ac2f29 ("net/mlx5: Issue SW reset on FW assert") Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index fa1665caac46..f99e1752d4e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -243,7 +243,7 @@ recover_from_sw_reset: if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) break; - cond_resched(); + msleep(20); } while (!time_after(jiffies, end)); if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { -- cgit From 84be2fdae4f80b7388f754fe49149374a41725f2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 29 Mar 2020 07:10:43 +0300 Subject: net/mlx5: Fix condition for termination table cleanup When we destroy rules from slow path we need to avoid destroying termination tables since termination tables are never created in slow path. By doing so we avoid destroying the termination table created for the slow path. Fixes: d8a2034f152a ("net/mlx5: Don't use termination tables in slow path") Signed-off-by: Eli Cohen Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 +++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 39f42f985fbd..c1848b57f61c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -403,7 +403,6 @@ enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), - MLX5_ESW_ATTR_FLAG_HAIRPIN = BIT(3), }; struct mlx5_esw_flow_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f171eb2234b0..b2e38e0cde97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -300,7 +300,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; - bool hairpin = false; int j, i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) @@ -398,21 +397,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) { + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, &flow_act, dest, i); - hairpin = true; - } else { + else rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); - } if (IS_ERR(rule)) goto err_add_rule; else atomic64_inc(&esw->offloads.num_flows); - if (hairpin) - attr->flags |= MLX5_ESW_ATTR_FLAG_HAIRPIN; - return rule; err_add_rule: @@ -501,7 +495,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, mlx5_del_flow_rules(rule); - if (attr->flags & MLX5_ESW_ATTR_FLAG_HAIRPIN) { + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { /* unref the term table */ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { if (attr->dests[i].termtbl) -- cgit From d19987ccf57501894fdd8fadc2e55e4a3dd57239 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 24 Mar 2020 15:04:26 +0200 Subject: net/mlx5e: Add missing release firmware call Once driver finishes flashing the firmware image, it should release it. Fixes: 9c8bca2637b8 ("mlx5: Move firmware flash implementation to devlink") Signed-off-by: Eran Ben Elisha Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index bdeb291f6b67..e94f0c4d74a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -23,7 +23,10 @@ static int mlx5_devlink_flash_update(struct devlink *devlink, if (err) return err; - return mlx5_firmware_flash(dev, fw, extack); + err = mlx5_firmware_flash(dev, fw, extack); + release_firmware(fw); + + return err; } static u8 mlx5_fw_ver_major(u32 version) -- cgit From 70f478ca085deec4d6c1f187f773f5827ddce7e8 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 1 Apr 2020 14:41:27 +0300 Subject: net/mlx5e: Fix nest_level for vlan pop action Current value of nest_level, assigned from net_device lower_level value, does not reflect the actual number of vlan headers, needed to pop. For ex., if we have untagged ingress traffic sended over vlan devices, instead of one pop action, driver will perform two pop actions. To fix that, calculate nest_level as difference between vlan device and parent device lower_levels. Fixes: f3b0a18bb6cb ("net: remove unnecessary variables and callback") Signed-off-by: Dmytro Linkin Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 438128dde187..e3fee837c7a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3558,12 +3558,13 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr, u32 *action) { - int nest_level = attr->parse_attr->filter_dev->lower_level; struct flow_action_entry vlan_act = { .id = FLOW_ACTION_VLAN_POP, }; - int err = 0; + int nest_level, err = 0; + nest_level = attr->parse_attr->filter_dev->lower_level - + priv->netdev->lower_level; while (nest_level--) { err = parse_tc_vlan_action(priv, &vlan_act, attr, action); if (err) -- cgit From d5a3c2b640093c8a4bb5d76170a8f6c8c2eacc17 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 29 Mar 2020 18:54:10 +0300 Subject: net/mlx5e: Fix missing pedit action after ct clear action With ct clear action we should not allocate the action in hw and not release the mod_acts parsed in advance. It will be done when handling the ct clear action. Fixes: 1ef3018f5af3 ("net/mlx5e: CT: Support clear action") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3fee837c7a3..a574c588269a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1343,7 +1343,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (err) return err; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); if (err) -- cgit From 7482d9cb5b974b7ad1a58fa8714f7a8c05b5d278 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 3 Apr 2020 03:57:30 -0500 Subject: net/mlx5e: Fix pfnum in devlink port attribute Cited patch missed to extract PCI pf number accurately for PF and VF port flavour. It considered PCI device + function number. Due to this, device having non zero device number shown large pfnum. Hence, use only PCI function number; to avoid similar errors, derive pfnum one time for all port flavours. Fixes: f60f315d339e ("net/mlx5e: Register devlink ports for physical link, PCI PF, VFs") Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2a0243e4af75..55457f268495 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -2050,29 +2050,30 @@ static int register_devlink_port(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep = rpriv->rep; struct netdev_phys_item_id ppid = {}; unsigned int dl_port_index = 0; + u16 pfnum; if (!is_devlink_port_supported(dev, rpriv)) return 0; mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); + pfnum = PCI_FUNC(dev->pdev->devfn); if (rep->vport == MLX5_VPORT_UPLINK) { devlink_port_attrs_set(&rpriv->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - PCI_FUNC(dev->pdev->devfn), false, 0, + pfnum, false, 0, &ppid.id[0], ppid.id_len); dl_port_index = vport_to_devlink_port_index(dev, rep->vport); } else if (rep->vport == MLX5_VPORT_PF) { devlink_port_attrs_pci_pf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, - dev->pdev->devfn); + pfnum); dl_port_index = rep->vport; } else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) { devlink_port_attrs_pci_vf_set(&rpriv->dl_port, &ppid.id[0], ppid.id_len, - dev->pdev->devfn, - rep->vport - 1); + pfnum, rep->vport - 1); dl_port_index = vport_to_devlink_port_index(dev, rep->vport); } -- cgit From 230a1bc2470c5554a8c2bfe14774863897dc9386 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 3 Apr 2020 02:35:46 -0500 Subject: net/mlx5e: Fix devlink port netdev unregistration sequence In cited commit netdevice is registered after devlink port. Unregistration flow should be mirror sequence of registration flow. Hence, unregister netdevice before devlink port. Fixes: 31e87b39ba9d ("net/mlx5e: Fix devlink port register sequence") Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dd7f338425eb..f02150a97ac8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5526,8 +5526,8 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif - mlx5e_devlink_port_unregister(priv); unregister_netdev(priv->netdev); + mlx5e_devlink_port_unregister(priv); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); } -- cgit From 9808dd0a2aeebcb72239a3b082159b0186d9ac3d Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Fri, 27 Mar 2020 12:12:31 +0300 Subject: net/mlx5e: CT: Use rhashtable's ct entries instead of a separate list Fixes CT entries list corruption. After allowing parallel insertion/removals in upper nf flow table layer, unprotected ct entries list can be corrupted by parallel add/del on the same flow table. CT entries list is only used while freeing a ct zone flow table to go over all the ct entries offloaded on that zone/table, and flush the table. As rhashtable already provides an api to go over all the inserted entries, fix the race by using the rhashtable iteration instead, and remove the list. Fixes: 7da182a998d6 ("netfilter: flowtable: Use work entry per offload command") Reviewed-by: Oz Shlomo Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index ad3e3a65d403..16416eaac39e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -67,11 +67,9 @@ struct mlx5_ct_ft { struct nf_flowtable *nf_ft; struct mlx5_tc_ct_priv *ct_priv; struct rhashtable ct_entries_ht; - struct list_head ct_entries_list; }; struct mlx5_ct_entry { - struct list_head list; u16 zone; struct rhash_head node; struct flow_rule *flow_rule; @@ -617,8 +615,6 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_insert; - list_add(&entry->list, &ft->ct_entries_list); - return 0; err_insert: @@ -646,7 +642,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params)); - list_del(&entry->list); kfree(entry); return 0; @@ -818,7 +813,6 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, ft->zone = zone; ft->nf_ft = nf_ft; ft->ct_priv = ct_priv; - INIT_LIST_HEAD(&ft->ct_entries_list); refcount_set(&ft->refcount, 1); err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); @@ -847,12 +841,12 @@ err_init: } static void -mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) +mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) { - struct mlx5_ct_entry *entry; + struct mlx5_tc_ct_priv *ct_priv = arg; + struct mlx5_ct_entry *entry = ptr; - list_for_each_entry(entry, &ft->ct_entries_list, list) - mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry); + mlx5_tc_ct_entry_del_rules(ct_priv, entry); } static void @@ -863,9 +857,10 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) nf_flow_table_offload_del_cb(ft->nf_ft, mlx5_tc_ct_block_flow_offload, ft); - mlx5_tc_ct_flush_ft(ct_priv, ft); rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); - rhashtable_destroy(&ft->ct_entries_ht); + rhashtable_free_and_destroy(&ft->ct_entries_ht, + mlx5_tc_ct_flush_ft_entry, + ct_priv); kfree(ft); } -- cgit From 8e368dc72e86ad1e1a612416f32d5ad22dca88bc Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 7 Apr 2020 20:35:40 -0700 Subject: bpf: Fix use of sk->sk_reuseport from sk_assign In testing, we found that for request sockets the sk->sk_reuseport field may yet be uninitialized, which caused bpf_sk_assign() to randomly succeed or return -ESOCKTNOSUPPORT when handling the forward ACK in a three-way handshake. Fix it by only applying the reuseport check for full sockets. Fixes: cf7fbe660f2d ("bpf: Add socket assign support") Signed-off-by: Joe Stringer Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200408033540.10339-1-joe@wand.net.nz --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 7628b947dbc3..7d6ceaa54d21 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5925,7 +5925,7 @@ BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags) return -EOPNOTSUPP; if (unlikely(dev_net(skb->dev) != sock_net(sk))) return -ENETUNREACH; - if (unlikely(sk->sk_reuseport)) + if (unlikely(sk_fullsock(sk) && sk->sk_reuseport)) return -ESOCKTNOSUPPORT; if (sk_is_refcounted(sk) && unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) -- cgit From bb9562cf5c67813034c96afb50bd21130a504441 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Wed, 8 Apr 2020 18:12:29 +0000 Subject: arm, bpf: Fix bugs with ALU64 {RSH, ARSH} BPF_K shift by 0 The current arm BPF JIT does not correctly compile RSH or ARSH when the immediate shift amount is 0. This causes the "rsh64 by 0 imm" and "arsh64 by 0 imm" BPF selftests to hang the kernel by reaching an instruction the verifier determines to be unreachable. The root cause is in how immediate right shifts are encoded on arm. For LSR and ASR (logical and arithmetic right shift), a bit-pattern of 00000 in the immediate encodes a shift amount of 32. When the BPF immediate is 0, the generated code shifts by 32 instead of the expected behavior (a no-op). This patch fixes the bugs by adding an additional check if the BPF immediate is 0. After the change, the above mentioned BPF selftests pass. Fixes: 39c13c204bb11 ("arm: eBPF JIT compiler") Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200408181229.10909-1-luke.r.nels@gmail.com --- arch/arm/net/bpf_jit_32.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index cc29869d12a3..d124f78e20ac 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -929,7 +929,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); @@ -955,7 +959,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); -- cgit From e750b84dc9c5510b89d9f52695f9c2f51e73eaab Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Wed, 8 Apr 2020 22:09:31 +0000 Subject: Documentation: devlink: fix broken link warning At 'make htmldocs' the following warning is thrown: Documentation/networking/devlink/devlink-trap.rst:302: WARNING: undefined label: generic-packet-trap-groups Fixes the warning by setting the label to the specified header, within the same document. Signed-off-by: Lothar Rubusch Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-trap.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index a09971c2115c..fe089acb7783 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -257,6 +257,8 @@ drivers: * :doc:`netdevsim` * :doc:`mlxsw` +.. _Generic-Packet-Trap-Groups: + Generic Packet Trap Groups ========================== -- cgit From 6dbf02acef69b0742c238574583b3068afbd227c Mon Sep 17 00:00:00 2001 From: Wang Wenhu Date: Wed, 8 Apr 2020 19:53:53 -0700 Subject: net: qrtr: send msgs from local of same id as broadcast If the local node id(qrtr_local_nid) is not modified after its initialization, it equals to the broadcast node id(QRTR_NODE_BCAST). So the messages from local node should not be taken as broadcast and keep the process going to send them out anyway. The definitions are as follow: static unsigned int qrtr_local_nid = NUMA_NO_NODE; Fixes: fdf5fd397566 ("net: qrtr: Broadcast messages only from control port") Signed-off-by: Wang Wenhu Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index e22092e4a783..7ed31b5e77e4 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -906,20 +906,21 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) node = NULL; if (addr->sq_node == QRTR_NODE_BCAST) { - enqueue_fn = qrtr_bcast_enqueue; - if (addr->sq_port != QRTR_PORT_CTRL) { + if (addr->sq_port != QRTR_PORT_CTRL && + qrtr_local_nid != QRTR_NODE_BCAST) { release_sock(sk); return -ENOTCONN; } + enqueue_fn = qrtr_bcast_enqueue; } else if (addr->sq_node == ipc->us.sq_node) { enqueue_fn = qrtr_local_enqueue; } else { - enqueue_fn = qrtr_node_enqueue; node = qrtr_node_lookup(addr->sq_node); if (!node) { release_sock(sk); return -ECONNRESET; } + enqueue_fn = qrtr_node_enqueue; } plen = (len + 3) & ~3; -- cgit From 5f0224a6ddc3101ab9664a5f7a6287047934a079 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 9 Apr 2020 14:41:26 +0100 Subject: net-sysfs: remove redundant assignment to variable ret The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index cf0215734ceb..4773ad6ec111 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -80,7 +80,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); struct net *net = dev_net(netdev); unsigned long new; - int ret = -EINVAL; + int ret; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; -- cgit From 022e9d6090599c0593c78e87dc9ba98a290e6bc4 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 9 Apr 2020 14:08:08 +0000 Subject: net: macsec: fix using wrong structure in macsec_changelink() In the macsec_changelink(), "struct macsec_tx_sa tx_sc" is used to store "macsec_secy.tx_sc". But, the struct type of tx_sc is macsec_tx_sc, not macsec_tx_sa. So, the macsec_tx_sc should be used instead. Test commands: ip link add dummy0 type dummy ip link add macsec0 link dummy0 type macsec ip link set macsec0 type macsec encrypt off Splat looks like: [61119.963483][ T9335] ================================================================== [61119.964709][ T9335] BUG: KASAN: slab-out-of-bounds in macsec_changelink.part.34+0xb6/0x200 [macsec] [61119.965787][ T9335] Read of size 160 at addr ffff888020d69c68 by task ip/9335 [61119.966699][ T9335] [61119.966979][ T9335] CPU: 0 PID: 9335 Comm: ip Not tainted 5.6.0+ #503 [61119.967791][ T9335] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [61119.968914][ T9335] Call Trace: [61119.969324][ T9335] dump_stack+0x96/0xdb [61119.969809][ T9335] ? macsec_changelink.part.34+0xb6/0x200 [macsec] [61119.970554][ T9335] print_address_description.constprop.5+0x1be/0x360 [61119.971294][ T9335] ? macsec_changelink.part.34+0xb6/0x200 [macsec] [61119.971973][ T9335] ? macsec_changelink.part.34+0xb6/0x200 [macsec] [61119.972703][ T9335] __kasan_report+0x12a/0x170 [61119.973323][ T9335] ? macsec_changelink.part.34+0xb6/0x200 [macsec] [61119.973942][ T9335] kasan_report+0xe/0x20 [61119.974397][ T9335] check_memory_region+0x149/0x1a0 [61119.974866][ T9335] memcpy+0x1f/0x50 [61119.975209][ T9335] macsec_changelink.part.34+0xb6/0x200 [macsec] [61119.975825][ T9335] ? macsec_get_stats64+0x3e0/0x3e0 [macsec] [61119.976451][ T9335] ? kernel_text_address+0x111/0x120 [61119.976990][ T9335] ? pskb_expand_head+0x25f/0xe10 [61119.977503][ T9335] ? stack_trace_save+0x82/0xb0 [61119.977986][ T9335] ? memset+0x1f/0x40 [61119.978397][ T9335] ? __nla_validate_parse+0x98/0x1ab0 [61119.978936][ T9335] ? macsec_alloc_tfm+0x90/0x90 [macsec] [61119.979511][ T9335] ? __kasan_slab_free+0x111/0x150 [61119.980021][ T9335] ? kfree+0xce/0x2f0 [61119.980700][ T9335] ? netlink_trim+0x196/0x1f0 [61119.981420][ T9335] ? nla_memcpy+0x90/0x90 [61119.982036][ T9335] ? register_lock_class+0x19e0/0x19e0 [61119.982776][ T9335] ? memcpy+0x34/0x50 [61119.983327][ T9335] __rtnl_newlink+0x922/0x1270 [ ... ] Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 0d580d81d910..a183250ff66a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3809,7 +3809,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); - struct macsec_tx_sa tx_sc; + struct macsec_tx_sc tx_sc; struct macsec_secy secy; int ret; -- cgit From e228a5d05e9ee25878e9a40de96e7ceb579d4893 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 8 Apr 2020 03:21:01 -0700 Subject: net/rds: Replace struct rds_mr's r_refcount with struct kref And removed rds_mr_put(). Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/message.c | 6 +++--- net/rds/rdma.c | 28 +++++++++++++++------------- net/rds/rds.h | 9 ++------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 50f13f1d4ae0..bbecb8cb873e 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -162,12 +162,12 @@ static void rds_message_purge(struct rds_message *rm) if (rm->rdma.op_active) rds_rdma_free_op(&rm->rdma); if (rm->rdma.op_rdma_mr) - rds_mr_put(rm->rdma.op_rdma_mr); + kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final); if (rm->atomic.op_active) rds_atomic_free_op(&rm->atomic); if (rm->atomic.op_rdma_mr) - rds_mr_put(rm->atomic.op_rdma_mr); + kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final); } void rds_message_put(struct rds_message *rm) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 585e6b3b69ce..f828b66978e4 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2020 Oracle and/or its affiliates. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key, if (insert) { rb_link_node(&insert->r_rb_node, parent, p); rb_insert_color(&insert->r_rb_node, root); - refcount_inc(&insert->r_refcount); + kref_get(&insert->r_kref); } return NULL; } @@ -99,7 +99,7 @@ static void rds_destroy_mr(struct rds_mr *mr) unsigned long flags; rdsdebug("RDS: destroy mr key is %x refcnt %u\n", - mr->r_key, refcount_read(&mr->r_refcount)); + mr->r_key, kref_read(&mr->r_kref)); if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) return; @@ -115,8 +115,10 @@ static void rds_destroy_mr(struct rds_mr *mr) mr->r_trans->free_mr(trans_private, mr->r_invalidate); } -void __rds_put_mr_final(struct rds_mr *mr) +void __rds_put_mr_final(struct kref *kref) { + struct rds_mr *mr = container_of(kref, struct rds_mr, r_kref); + rds_destroy_mr(mr); kfree(mr); } @@ -141,7 +143,7 @@ void rds_rdma_drop_keys(struct rds_sock *rs) RB_CLEAR_NODE(&mr->r_rb_node); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); spin_lock_irqsave(&rs->rs_rdma_lock, flags); } spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); @@ -242,7 +244,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, goto out; } - refcount_set(&mr->r_refcount, 1); + kref_init(&mr->r_kref); RB_CLEAR_NODE(&mr->r_rb_node); mr->r_trans = rs->rs_transport; mr->r_sock = rs; @@ -343,7 +345,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, rdsdebug("RDS: get_mr key is %x\n", mr->r_key); if (mr_ret) { - refcount_inc(&mr->r_refcount); + kref_get(&mr->r_kref); *mr_ret = mr; } @@ -351,7 +353,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, out: kfree(pages); if (mr) - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); return ret; } @@ -440,7 +442,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) * someone else drops their ref. */ rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); return 0; } @@ -481,7 +483,7 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) * trigger an async flush. */ if (zot_me) { rds_destroy_mr(mr); - rds_mr_put(mr); + kref_put(&mr->r_kref, __rds_put_mr_final); } } @@ -490,7 +492,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro) unsigned int i; if (ro->op_odp_mr) { - rds_mr_put(ro->op_odp_mr); + kref_put(&ro->op_odp_mr->r_kref, __rds_put_mr_final); } else { for (i = 0; i < ro->op_nents; i++) { struct page *page = sg_page(&ro->op_sg[i]); @@ -730,7 +732,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, goto out_pages; } RB_CLEAR_NODE(&local_odp_mr->r_rb_node); - refcount_set(&local_odp_mr->r_refcount, 1); + kref_init(&local_odp_mr->r_kref); local_odp_mr->r_trans = rs->rs_transport; local_odp_mr->r_sock = rs; local_odp_mr->r_trans_private = @@ -827,7 +829,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, if (!mr) err = -EINVAL; /* invalid r_key */ else - refcount_inc(&mr->r_refcount); + kref_get(&mr->r_kref); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); if (mr) { diff --git a/net/rds/rds.h b/net/rds/rds.h index e4a603523083..3cda01cfaa56 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -291,7 +291,7 @@ struct rds_incoming { struct rds_mr { struct rb_node r_rb_node; - refcount_t r_refcount; + struct kref r_kref; u32 r_key; /* A copy of the creation flags */ @@ -946,12 +946,7 @@ void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); -void __rds_put_mr_final(struct rds_mr *mr); -static inline void rds_mr_put(struct rds_mr *mr) -{ - if (refcount_dec_and_test(&mr->r_refcount)) - __rds_put_mr_final(mr); -} +void __rds_put_mr_final(struct kref *kref); static inline bool rds_destroy_pending(struct rds_connection *conn) { -- cgit From 2fabef4f65b46b261434a27ecdce291b63de8522 Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 8 Apr 2020 03:21:02 -0700 Subject: net/rds: Fix MR reference counting problem In rds_free_mr(), it calls rds_destroy_mr(mr) directly. But this defeats the purpose of reference counting and makes MR free handling impossible. It means that holding a reference does not guarantee that it is safe to access some fields. For example, In rds_cmsg_rdma_dest(), it increases the ref count, unlocks and then calls mr->r_trans->sync_mr(). But if rds_free_mr() (and rds_destroy_mr()) is called in between (there is no lock preventing this to happen), r_trans_private is set to NULL, causing a panic. Similar issue is in rds_rdma_unuse(). Reported-by: zerons Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma.c | 25 ++++++++++++------------- net/rds/rds.h | 8 -------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index f828b66978e4..113e442101ce 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -101,9 +101,6 @@ static void rds_destroy_mr(struct rds_mr *mr) rdsdebug("RDS: destroy mr key is %x refcnt %u\n", mr->r_key, kref_read(&mr->r_kref)); - if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) - return; - spin_lock_irqsave(&rs->rs_rdma_lock, flags); if (!RB_EMPTY_NODE(&mr->r_rb_node)) rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); @@ -142,7 +139,6 @@ void rds_rdma_drop_keys(struct rds_sock *rs) rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); RB_CLEAR_NODE(&mr->r_rb_node); spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); - rds_destroy_mr(mr); kref_put(&mr->r_kref, __rds_put_mr_final); spin_lock_irqsave(&rs->rs_rdma_lock, flags); } @@ -436,12 +432,6 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) if (!mr) return -EINVAL; - /* - * call rds_destroy_mr() ourselves so that we're sure it's done by the time - * we return. If we let rds_mr_put() do it it might not happen until - * someone else drops their ref. - */ - rds_destroy_mr(mr); kref_put(&mr->r_kref, __rds_put_mr_final); return 0; } @@ -466,6 +456,14 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) return; } + /* Get a reference so that the MR won't go away before calling + * sync_mr() below. + */ + kref_get(&mr->r_kref); + + /* If it is going to be freed, remove it from the tree now so + * that no other thread can find it and free it. + */ if (mr->r_use_once || force) { rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); RB_CLEAR_NODE(&mr->r_rb_node); @@ -479,12 +477,13 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) if (mr->r_trans->sync_mr) mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); + /* Release the reference held above. */ + kref_put(&mr->r_kref, __rds_put_mr_final); + /* If the MR was marked as invalidate, this will * trigger an async flush. */ - if (zot_me) { - rds_destroy_mr(mr); + if (zot_me) kref_put(&mr->r_kref, __rds_put_mr_final); - } } void rds_rdma_free_op(struct rm_rdma_op *ro) diff --git a/net/rds/rds.h b/net/rds/rds.h index 3cda01cfaa56..8e18cd2aec51 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -299,19 +299,11 @@ struct rds_mr { unsigned int r_invalidate:1; unsigned int r_write:1; - /* This is for RDS_MR_DEAD. - * It would be nice & consistent to make this part of the above - * bit field here, but we need to use test_and_set_bit. - */ - unsigned long r_state; struct rds_sock *r_sock; /* back pointer to the socket that owns us */ struct rds_transport *r_trans; void *r_trans_private; }; -/* Flags for mr->r_state */ -#define RDS_MR_DEAD 0 - static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) { return r_key | (((u64) offset) << 32); -- cgit From 690cc86321eb9bcee371710252742fb16fe96824 Mon Sep 17 00:00:00 2001 From: Taras Chornyi Date: Thu, 9 Apr 2020 20:25:24 +0300 Subject: net: ipv4: devinet: Fix crash when add/del multicast IP with autojoin When CONFIG_IP_MULTICAST is not set and multicast ip is added to the device with autojoin flag or when multicast ip is deleted kernel will crash. steps to reproduce: ip addr add 224.0.0.0/32 dev eth0 ip addr del 224.0.0.0/32 dev eth0 or ip addr add 224.0.0.0/32 dev eth0 autojoin Unable to handle kernel NULL pointer dereference at virtual address 0000000000000088 pc : _raw_write_lock_irqsave+0x1e0/0x2ac lr : lock_sock_nested+0x1c/0x60 Call trace: _raw_write_lock_irqsave+0x1e0/0x2ac lock_sock_nested+0x1c/0x60 ip_mc_config.isra.28+0x50/0xe0 inet_rtm_deladdr+0x1a8/0x1f0 rtnetlink_rcv_msg+0x120/0x350 netlink_rcv_skb+0x58/0x120 rtnetlink_rcv+0x14/0x20 netlink_unicast+0x1b8/0x270 netlink_sendmsg+0x1a0/0x3b0 ____sys_sendmsg+0x248/0x290 ___sys_sendmsg+0x80/0xc0 __sys_sendmsg+0x68/0xc0 __arm64_sys_sendmsg+0x20/0x30 el0_svc_common.constprop.2+0x88/0x150 do_el0_svc+0x20/0x80 el0_sync_handler+0x118/0x190 el0_sync+0x140/0x180 Fixes: 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on") Signed-off-by: Taras Chornyi Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 30fa42f5997d..c0dd561aa190 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -614,12 +614,15 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) +static int ip_mc_autojoin_config(struct net *net, bool join, + const struct in_ifaddr *ifa) { +#if defined(CONFIG_IP_MULTICAST) struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ifa->ifa_address, .imr_ifindex = ifa->ifa_dev->dev->ifindex, }; + struct sock *sk = net->ipv4.mc_autojoin_sk; int ret; ASSERT_RTNL(); @@ -632,6 +635,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) release_sock(sk); return ret; +#else + return -EOPNOTSUPP; +#endif } static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -675,7 +681,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, continue; if (ipv4_is_multicast(ifa->ifa_address)) - ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); + ip_mc_autojoin_config(net, false, ifa); __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -940,8 +946,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { - int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, - true, ifa); + int ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { inet_free_ifa(ifa); -- cgit From 9b038086f06be1a85d74fe8cc8e74e07db6f422e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Apr 2020 14:21:58 -0700 Subject: docs: networking: convert DIM to RST Convert the Dynamic Interrupt Moderation doc to RST and use the RST features like syntax highlight, function and structure documentation, enumerations, table of contents. Reviewed-by: Randy Dunlap Signed-off-by: Jakub Kicinski --- Documentation/networking/index.rst | 1 + Documentation/networking/net_dim.rst | 170 ++++++++++++++++++++++++++++++++++ Documentation/networking/net_dim.txt | 174 ----------------------------------- MAINTAINERS | 1 + 4 files changed, 172 insertions(+), 174 deletions(-) create mode 100644 Documentation/networking/net_dim.rst delete mode 100644 Documentation/networking/net_dim.txt diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 50133d9761c9..6538ede29661 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -22,6 +22,7 @@ Contents: z8530book msg_zerocopy failover + net_dim net_failover phy sfp-phylink diff --git a/Documentation/networking/net_dim.rst b/Documentation/networking/net_dim.rst new file mode 100644 index 000000000000..1de1e3ec774b --- /dev/null +++ b/Documentation/networking/net_dim.rst @@ -0,0 +1,170 @@ +====================================================== +Net DIM - Generic Network Dynamic Interrupt Moderation +====================================================== + +:Author: Tal Gilboa + +.. contents:: :depth: 2 + +Assumptions +=========== + +This document assumes the reader has basic knowledge in network drivers +and in general interrupt moderation. + + +Introduction +============ + +Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the +interrupt moderation configuration of a channel in order to optimize packet +processing. The mechanism includes an algorithm which decides if and how to +change moderation parameters for a channel, usually by performing an analysis on +runtime data sampled from the system. Net DIM is such a mechanism. In each +iteration of the algorithm, it analyses a given sample of the data, compares it +to the previous sample and if required, it can decide to change some of the +interrupt moderation configuration fields. The data sample is composed of data +bandwidth, the number of packets and the number of events. The time between +samples is also measured. Net DIM compares the current and the previous data and +returns an adjusted interrupt moderation configuration object. In some cases, +the algorithm might decide not to change anything. The configuration fields are +the minimum duration (microseconds) allowed between events and the maximum +number of wanted packets per event. The Net DIM algorithm ascribes importance to +increase bandwidth over reducing interrupt rate. + + +Net DIM Algorithm +================= + +Each iteration of the Net DIM algorithm follows these steps: + +#. Calculates new data sample. +#. Compares it to previous sample. +#. Makes a decision - suggests interrupt moderation configuration fields. +#. Applies a schedule work function, which applies suggested configuration. + +The first two steps are straightforward, both the new and the previous data are +supplied by the driver registered to Net DIM. The previous data is the new data +supplied to the previous iteration. The comparison step checks the difference +between the new and previous data and decides on the result of the last step. +A step would result as "better" if bandwidth increases and as "worse" if +bandwidth reduces. If there is no change in bandwidth, the packet rate is +compared in a similar fashion - increase == "better" and decrease == "worse". +In case there is no change in the packet rate as well, the interrupt rate is +compared. Here the algorithm tries to optimize for lower interrupt rate so an +increase in the interrupt rate is considered "worse" and a decrease is +considered "better". Step #2 has an optimization for avoiding false results: it +only considers a difference between samples as valid if it is greater than a +certain percentage. Also, since Net DIM does not measure anything by itself, it +assumes the data provided by the driver is valid. + +Step #3 decides on the suggested configuration based on the result from step #2 +and the internal state of the algorithm. The states reflect the "direction" of +the algorithm: is it going left (reducing moderation), right (increasing +moderation) or standing still. Another optimization is that if a decision +to stay still is made multiple times, the interval between iterations of the +algorithm would increase in order to reduce calculation overhead. Also, after +"parking" on one of the most left or most right decisions, the algorithm may +decide to verify this decision by taking a step in the other direction. This is +done in order to avoid getting stuck in a "deep sleep" scenario. Once a +decision is made, an interrupt moderation configuration is selected from +the predefined profiles. + +The last step is to notify the registered driver that it should apply the +suggested configuration. This is done by scheduling a work function, defined by +the Net DIM API and provided by the registered driver. + +As you can see, Net DIM itself does not actively interact with the system. It +would have trouble making the correct decisions if the wrong data is supplied to +it and it would be useless if the work function would not apply the suggested +configuration. This does, however, allow the registered driver some room for +manoeuvre as it may provide partial data or ignore the algorithm suggestion +under some conditions. + + +Registering a Network Device to DIM +=================================== + +Net DIM API exposes the main function net_dim(). +This function is the entry point to the Net +DIM algorithm and has to be called every time the driver would like to check if +it should change interrupt moderation parameters. The driver should provide two +data structures: :c:type:`struct dim ` and +:c:type:`struct dim_sample `. :c:type:`struct dim ` +describes the state of DIM for a specific object (RX queue, TX queue, +other queues, etc.). This includes the current selected profile, previous data +samples, the callback function provided by the driver and more. +:c:type:`struct dim_sample ` describes a data sample, +which will be compared to the data sample stored in :c:type:`struct dim ` +in order to decide on the algorithm's next +step. The sample should include bytes, packets and interrupts, measured by +the driver. + +In order to use Net DIM from a networking driver, the driver needs to call the +main net_dim() function. The recommended method is to call net_dim() on each +interrupt. Since Net DIM has a built-in moderation and it might decide to skip +iterations under certain conditions, there is no need to moderate the net_dim() +calls as well. As mentioned above, the driver needs to provide an object of type +:c:type:`struct dim ` to the net_dim() function call. It is advised for +each entity using Net DIM to hold a :c:type:`struct dim ` as part of its +data structure and use it as the main Net DIM API object. +The :c:type:`struct dim_sample ` should hold the latest +bytes, packets and interrupts count. No need to perform any calculations, just +include the raw data. + +The net_dim() call itself does not return anything. Instead Net DIM relies on +the driver to provide a callback function, which is called when the algorithm +decides to make a change in the interrupt moderation parameters. This callback +will be scheduled and run in a separate thread in order not to add overhead to +the data flow. After the work is done, Net DIM algorithm needs to be set to +the proper state in order to move to the next iteration. + + +Example +======= + +The following code demonstrates how to register a driver to Net DIM. The actual +usage is not complete but it should make the outline of the usage clear. + +.. code-block:: c + + #include + + /* Callback for net DIM to schedule on a decision to change moderation */ + void my_driver_do_dim_work(struct work_struct *work) + { + /* Get struct dim from struct work_struct */ + struct dim *dim = container_of(work, struct dim, + work); + /* Do interrupt moderation related stuff */ + ... + + /* Signal net DIM work is done and it should move to next iteration */ + dim->state = DIM_START_MEASURE; + } + + /* My driver's interrupt handler */ + int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...) + { + ... + /* A struct to hold current measured data */ + struct dim_sample dim_sample; + ... + /* Initiate data sample struct with current data */ + dim_update_sample(my_entity->events, + my_entity->packets, + my_entity->bytes, + &dim_sample); + /* Call net DIM */ + net_dim(&my_entity->dim, dim_sample); + ... + } + + /* My entity's initialization function (my_entity was already allocated) */ + int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...) + { + ... + /* Initiate struct work_struct with my driver's callback function */ + INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work); + ... + } diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt deleted file mode 100644 index 9bdb7d5a3ba3..000000000000 --- a/Documentation/networking/net_dim.txt +++ /dev/null @@ -1,174 +0,0 @@ -Net DIM - Generic Network Dynamic Interrupt Moderation -====================================================== - -Author: - Tal Gilboa - - -Contents -========= - -- Assumptions -- Introduction -- The Net DIM Algorithm -- Registering a Network Device to DIM -- Example - -Part 0: Assumptions -====================== - -This document assumes the reader has basic knowledge in network drivers -and in general interrupt moderation. - - -Part I: Introduction -====================== - -Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the -interrupt moderation configuration of a channel in order to optimize packet -processing. The mechanism includes an algorithm which decides if and how to -change moderation parameters for a channel, usually by performing an analysis on -runtime data sampled from the system. Net DIM is such a mechanism. In each -iteration of the algorithm, it analyses a given sample of the data, compares it -to the previous sample and if required, it can decide to change some of the -interrupt moderation configuration fields. The data sample is composed of data -bandwidth, the number of packets and the number of events. The time between -samples is also measured. Net DIM compares the current and the previous data and -returns an adjusted interrupt moderation configuration object. In some cases, -the algorithm might decide not to change anything. The configuration fields are -the minimum duration (microseconds) allowed between events and the maximum -number of wanted packets per event. The Net DIM algorithm ascribes importance to -increase bandwidth over reducing interrupt rate. - - -Part II: The Net DIM Algorithm -=============================== - -Each iteration of the Net DIM algorithm follows these steps: -1. Calculates new data sample. -2. Compares it to previous sample. -3. Makes a decision - suggests interrupt moderation configuration fields. -4. Applies a schedule work function, which applies suggested configuration. - -The first two steps are straightforward, both the new and the previous data are -supplied by the driver registered to Net DIM. The previous data is the new data -supplied to the previous iteration. The comparison step checks the difference -between the new and previous data and decides on the result of the last step. -A step would result as "better" if bandwidth increases and as "worse" if -bandwidth reduces. If there is no change in bandwidth, the packet rate is -compared in a similar fashion - increase == "better" and decrease == "worse". -In case there is no change in the packet rate as well, the interrupt rate is -compared. Here the algorithm tries to optimize for lower interrupt rate so an -increase in the interrupt rate is considered "worse" and a decrease is -considered "better". Step #2 has an optimization for avoiding false results: it -only considers a difference between samples as valid if it is greater than a -certain percentage. Also, since Net DIM does not measure anything by itself, it -assumes the data provided by the driver is valid. - -Step #3 decides on the suggested configuration based on the result from step #2 -and the internal state of the algorithm. The states reflect the "direction" of -the algorithm: is it going left (reducing moderation), right (increasing -moderation) or standing still. Another optimization is that if a decision -to stay still is made multiple times, the interval between iterations of the -algorithm would increase in order to reduce calculation overhead. Also, after -"parking" on one of the most left or most right decisions, the algorithm may -decide to verify this decision by taking a step in the other direction. This is -done in order to avoid getting stuck in a "deep sleep" scenario. Once a -decision is made, an interrupt moderation configuration is selected from -the predefined profiles. - -The last step is to notify the registered driver that it should apply the -suggested configuration. This is done by scheduling a work function, defined by -the Net DIM API and provided by the registered driver. - -As you can see, Net DIM itself does not actively interact with the system. It -would have trouble making the correct decisions if the wrong data is supplied to -it and it would be useless if the work function would not apply the suggested -configuration. This does, however, allow the registered driver some room for -manoeuvre as it may provide partial data or ignore the algorithm suggestion -under some conditions. - - -Part III: Registering a Network Device to DIM -============================================== - -Net DIM API exposes the main function net_dim(struct dim *dim, -struct dim_sample end_sample). This function is the entry point to the Net -DIM algorithm and has to be called every time the driver would like to check if -it should change interrupt moderation parameters. The driver should provide two -data structures: struct dim and struct dim_sample. Struct dim -describes the state of DIM for a specific object (RX queue, TX queue, -other queues, etc.). This includes the current selected profile, previous data -samples, the callback function provided by the driver and more. -Struct dim_sample describes a data sample, which will be compared to the -data sample stored in struct dim in order to decide on the algorithm's next -step. The sample should include bytes, packets and interrupts, measured by -the driver. - -In order to use Net DIM from a networking driver, the driver needs to call the -main net_dim() function. The recommended method is to call net_dim() on each -interrupt. Since Net DIM has a built-in moderation and it might decide to skip -iterations under certain conditions, there is no need to moderate the net_dim() -calls as well. As mentioned above, the driver needs to provide an object of type -struct dim to the net_dim() function call. It is advised for each entity -using Net DIM to hold a struct dim as part of its data structure and use it -as the main Net DIM API object. The struct dim_sample should hold the latest -bytes, packets and interrupts count. No need to perform any calculations, just -include the raw data. - -The net_dim() call itself does not return anything. Instead Net DIM relies on -the driver to provide a callback function, which is called when the algorithm -decides to make a change in the interrupt moderation parameters. This callback -will be scheduled and run in a separate thread in order not to add overhead to -the data flow. After the work is done, Net DIM algorithm needs to be set to -the proper state in order to move to the next iteration. - - -Part IV: Example -================= - -The following code demonstrates how to register a driver to Net DIM. The actual -usage is not complete but it should make the outline of the usage clear. - -my_driver.c: - -#include - -/* Callback for net DIM to schedule on a decision to change moderation */ -void my_driver_do_dim_work(struct work_struct *work) -{ - /* Get struct dim from struct work_struct */ - struct dim *dim = container_of(work, struct dim, - work); - /* Do interrupt moderation related stuff */ - ... - - /* Signal net DIM work is done and it should move to next iteration */ - dim->state = DIM_START_MEASURE; -} - -/* My driver's interrupt handler */ -int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...) -{ - ... - /* A struct to hold current measured data */ - struct dim_sample dim_sample; - ... - /* Initiate data sample struct with current data */ - dim_update_sample(my_entity->events, - my_entity->packets, - my_entity->bytes, - &dim_sample); - /* Call net DIM */ - net_dim(&my_entity->dim, dim_sample); - ... -} - -/* My entity's initialization function (my_entity was already allocated) */ -int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...) -{ - ... - /* Initiate struct work_struct with my driver's callback function */ - INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work); - ... -} diff --git a/MAINTAINERS b/MAINTAINERS index 9271068bde63..46a3a01b54b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5961,6 +5961,7 @@ M: Tal Gilboa S: Maintained F: include/linux/dim.h F: lib/dim/ +F: Documentation/networking/net_dim.rst DZ DECSTATION DZ11 SERIAL DRIVER M: "Maciej W. Rozycki" -- cgit From 9d8592896fd946b27c385d42f5c80b0b5254fce9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 9 Apr 2020 14:21:59 -0700 Subject: docs: networking: add full DIM API Add the full net DIM API to the net_dim.rst file. Signed-off-by: Randy Dunlap Signed-off-by: Jakub Kicinski --- Documentation/networking/net_dim.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/networking/net_dim.rst b/Documentation/networking/net_dim.rst index 1de1e3ec774b..3bed9fd95336 100644 --- a/Documentation/networking/net_dim.rst +++ b/Documentation/networking/net_dim.rst @@ -168,3 +168,9 @@ usage is not complete but it should make the outline of the usage clear. INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work); ... } + +Dynamic Interrupt Moderation (DIM) library API +============================================== + +.. kernel-doc:: include/linux/dim.h + :internal: -- cgit From 3b72f84f8fb65e83e85e9be58eabcf95a40b8f46 Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Sat, 11 Apr 2020 18:51:25 +0200 Subject: net: phy: marvell: Fix pause frame negotiation The negotiation of flow control / pause frame modes was broken since commit fcf1f59afc67 ("net: phy: marvell: rearrange to use genphy_read_lpa()") moved the setting of phydev->duplex below the phy_resolve_aneg_pause call. Due to a check of DUPLEX_FULL in that function, phydev->pause was no longer set. Fix it by moving the parsing of the status variable before the blocks dealing with the pause frames. As the Marvell 88E1510 datasheet does not specify the timing between the link status and the "Speed and Duplex Resolved" bit, we have to force the link down as long as the resolved bit is not set, to avoid reporting link up before we even have valid Speed/Duplex. Tested with a Marvell 88E1510 (RGMII to Copper/1000Base-T) Fixes: fcf1f59afc67 ("net: phy: marvell: rearrange to use genphy_read_lpa()") Signed-off-by: Clemens Gruber Signed-off-by: Jakub Kicinski --- drivers/net/phy/marvell.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 4714ca0e0d4b..7fc8e10c5f33 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1263,6 +1263,30 @@ static int marvell_read_status_page_an(struct phy_device *phydev, int lpa; int err; + if (!(status & MII_M1011_PHY_STATUS_RESOLVED)) { + phydev->link = 0; + return 0; + } + + if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + switch (status & MII_M1011_PHY_STATUS_SPD_MASK) { + case MII_M1011_PHY_STATUS_1000: + phydev->speed = SPEED_1000; + break; + + case MII_M1011_PHY_STATUS_100: + phydev->speed = SPEED_100; + break; + + default: + phydev->speed = SPEED_10; + break; + } + if (!fiber) { err = genphy_read_lpa(phydev); if (err < 0) @@ -1291,28 +1315,6 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } } - if (!(status & MII_M1011_PHY_STATUS_RESOLVED)) - return 0; - - if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - - switch (status & MII_M1011_PHY_STATUS_SPD_MASK) { - case MII_M1011_PHY_STATUS_1000: - phydev->speed = SPEED_1000; - break; - - case MII_M1011_PHY_STATUS_100: - phydev->speed = SPEED_100; - break; - - default: - phydev->speed = SPEED_10; - break; - } - return 0; } -- cgit From 465aa30420bc730ad8f0fe235bc80d169e4b5831 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 10 Apr 2020 20:11:50 +0100 Subject: net: neterion: remove redundant assignment to variable tmp64 The variable tmp64 is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/neterion/s2io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 0ec6b8e8b549..67e62603fe3b 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -5155,7 +5155,7 @@ static int do_s2io_delete_unicast_mc(struct s2io_nic *sp, u64 addr) /* read mac entries from CAM */ static u64 do_s2io_read_unicast_mc(struct s2io_nic *sp, int offset) { - u64 tmp64 = 0xffffffffffff0000ULL, val64; + u64 tmp64, val64; struct XENA_dev_config __iomem *bar0 = sp->bar0; /* read mac addr */ -- cgit From 2ba538989479b3ff34f66728ecfbaf3c5daf0797 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 11 Apr 2020 09:30:04 +0200 Subject: soc: qcom: ipa: Add a missing '\n' in a log message Message logged by 'dev_xxx()' or 'pr_xxx()' should end with a '\n'. Fixes: a646d6ec9098 ("soc: qcom: ipa: modem and microcontroller") Signed-off-by: Christophe JAILLET Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_modem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 55c9329a4b1d..ed10818dd99f 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -297,14 +297,13 @@ static void ipa_modem_crashed(struct ipa *ipa) ret = ipa_endpoint_modem_exception_reset_all(ipa); if (ret) - dev_err(dev, "error %d resetting exception endpoint", - ret); + dev_err(dev, "error %d resetting exception endpoint\n", ret); ipa_endpoint_modem_pause_all(ipa, false); ret = ipa_modem_stop(ipa); if (ret) - dev_err(dev, "error %d stopping modem", ret); + dev_err(dev, "error %d stopping modem\n", ret); /* Now prepare for the next modem boot */ ret = ipa_mem_zero_modem(ipa); -- cgit From e6aaeafd56e33345f1d242cde33dd92614734be8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 11 Apr 2020 09:52:11 +0200 Subject: net: ethernet: ti: Add missing '\n' in log messages Message logged by 'dev_xxx()' or 'pr_xxx()' should end with a '\n'. Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Signed-off-by: Christophe JAILLET Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f71c15c39492..2bf56733ba94 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1372,7 +1372,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) err: i = devm_add_action(dev, am65_cpsw_nuss_free_tx_chns, common); if (i) { - dev_err(dev, "failed to add free_tx_chns action %d", i); + dev_err(dev, "Failed to add free_tx_chns action %d\n", i); return i; } @@ -1481,7 +1481,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) err: i = devm_add_action(dev, am65_cpsw_nuss_free_rx_chns, common); if (i) { - dev_err(dev, "failed to add free_rx_chns action %d", i); + dev_err(dev, "Failed to add free_rx_chns action %d\n", i); return i; } @@ -1691,7 +1691,7 @@ static int am65_cpsw_nuss_init_ndev_2g(struct am65_cpsw_common *common) ret = devm_add_action_or_reset(dev, am65_cpsw_pcpu_stats_free, ndev_priv->stats); if (ret) { - dev_err(dev, "failed to add percpu stat free action %d", ret); + dev_err(dev, "Failed to add percpu stat free action %d\n", ret); return ret; } -- cgit From 3fe260e00cd0bf0be853c48fcc1e19853df615bb Mon Sep 17 00:00:00 2001 From: Gilberto Bertin Date: Fri, 10 Apr 2020 18:20:59 +0200 Subject: net: tun: record RX queue in skb before do_xdp_generic() This allows netif_receive_generic_xdp() to correctly determine the RX queue from which the skb is coming, so that the context passed to the XDP program will contain the correct RX queue index. Signed-off-by: Gilberto Bertin Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 07476c6510f2..44889eba1dbc 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1888,6 +1888,7 @@ drop: skb_reset_network_header(skb); skb_probe_transport_header(skb); + skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { struct bpf_prog *xdp_prog; @@ -2459,6 +2460,7 @@ build: skb->protocol = eth_type_trans(skb, tun->dev); skb_reset_network_header(skb); skb_probe_transport_header(skb); + skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { err = do_xdp_generic(xdp_prog, skb); @@ -2470,7 +2472,6 @@ build: !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); - skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); /* No need for get_cpu_ptr() here since this function is -- cgit From e154659ba39a1c2be576aaa0a5bda8088d707950 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 11 Apr 2020 21:05:01 +0200 Subject: mptcp: fix double-unlock in mptcp_poll mptcp_connect/28740 is trying to release lock (sk_lock-AF_INET) at: [] mptcp_poll+0xb9/0x550 but there are no more locks to release! Call Trace: lock_release+0x50f/0x750 release_sock+0x171/0x1b0 mptcp_poll+0xb9/0x550 sock_poll+0x157/0x470 ? get_net_ns+0xb0/0xb0 do_sys_poll+0x63c/0xdd0 Problem is that __mptcp_tcp_fallback() releases the mptcp socket lock, but after recent change it doesn't do this in all of its return paths. To fix this, remove the unlock from __mptcp_tcp_fallback() and always do the unlock in the caller. Also add a small comment as to why we have this __mptcp_needs_tcp_fallback(). Fixes: 0b4f33def7bbde ("mptcp: fix tcp fallback crash") Reported-by: syzbot+e56606435b7bfeea8cf5@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 939a5045181a..9936e33ac351 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -97,12 +97,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) if (likely(!__mptcp_needs_tcp_fallback(msk))) return NULL; - if (msk->subflow) { - release_sock((struct sock *)msk); - return msk->subflow; - } - - return NULL; + return msk->subflow; } static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) @@ -734,9 +729,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } +fallback: ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { -fallback: + release_sock(sk); pr_debug("fallback passthrough"); ret = sock_sendmsg(ssock, msg); return ret >= 0 ? ret + copied : (copied ? copied : ret); @@ -769,8 +765,14 @@ fallback: if (ret < 0) break; if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { + /* Can happen for passive sockets: + * 3WHS negotiated MPTCP, but first packet after is + * plain TCP (e.g. due to middlebox filtering unknown + * options). + * + * Fall back to TCP. + */ release_sock(ssk); - ssock = __mptcp_tcp_fallback(msk); goto fallback; } @@ -883,6 +885,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ssock = __mptcp_tcp_fallback(msk); if (unlikely(ssock)) { fallback: + release_sock(sk); pr_debug("fallback-read subflow=%p", mptcp_subflow_ctx(ssock->sk)); copied = sock_recvmsg(ssock, msg, flags); @@ -1467,12 +1470,11 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_tcp_fallback(msk); + release_sock(sk); if (ssock) return tcp_setsockopt(ssock->sk, level, optname, optval, optlen); - release_sock(sk); - return -EOPNOTSUPP; } @@ -1492,12 +1494,11 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_tcp_fallback(msk); + release_sock(sk); if (ssock) return tcp_getsockopt(ssock->sk, level, optname, optval, option); - release_sock(sk); - return -EOPNOTSUPP; } -- cgit From 664d035c4707ac8643c2846d1a1d4cdf9ce89b90 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 12 Apr 2020 23:20:34 +0200 Subject: net: mvneta: Fix a typo s/mvmeta/mvneta/ Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5be61f73b6ab..51889770958d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5383,7 +5383,7 @@ static int __init mvneta_driver_init(void) { int ret; - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvmeta:online", + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online", mvneta_cpu_online, mvneta_cpu_down_prepare); if (ret < 0) -- cgit From 0e012b4e4b5ec8e064be3502382579dd0bb43269 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 12 Apr 2020 00:40:30 +0200 Subject: nl80211: fix NL80211_ATTR_FTM_RESPONDER policy The nested policy here should be established using the NLA_POLICY_NESTED() macro so the length is properly filled in. Cc: stable@vger.kernel.org Fixes: 81e54d08d9d8 ("cfg80211: support FTM responder configuration/statistics") Link: https://lore.kernel.org/r/20200412004029.9d0722bb56c8.Ie690bfcc4a1a61ff8d8ca7e475d59fcaa52fb2da@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5fa402144cda..692bcd35f809 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -644,10 +644,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY, .len = NL80211_HE_MAX_CAPABILITY_LEN }, - [NL80211_ATTR_FTM_RESPONDER] = { - .type = NLA_NESTED, - .validation_data = nl80211_ftm_responder_policy, - }, + [NL80211_ATTR_FTM_RESPONDER] = + NLA_POLICY_NESTED(nl80211_ftm_responder_policy), [NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PEER_MEASUREMENTS] = NLA_POLICY_NESTED(nl80211_pmsr_attr_policy), -- cgit From 7ea862048317aa76d0f22334202779a25530980c Mon Sep 17 00:00:00 2001 From: Tuomas Tynkkynen Date: Fri, 10 Apr 2020 15:32:57 +0300 Subject: mac80211_hwsim: Use kstrndup() in place of kasprintf() syzbot reports a warning: precision 33020 too large WARNING: CPU: 0 PID: 9618 at lib/vsprintf.c:2471 set_precision+0x150/0x180 lib/vsprintf.c:2471 vsnprintf+0xa7b/0x19a0 lib/vsprintf.c:2547 kvasprintf+0xb2/0x170 lib/kasprintf.c:22 kasprintf+0xbb/0xf0 lib/kasprintf.c:59 hwsim_del_radio_nl+0x63a/0x7e0 drivers/net/wireless/mac80211_hwsim.c:3625 genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] ... entry_SYSCALL_64_after_hwframe+0x49/0xbe Thus it seems that kasprintf() with "%.*s" format can not be used for duplicating a string with arbitrary length. Replace it with kstrndup(). Note that later this string is limited to NL80211_WIPHY_NAME_MAXLEN == 64, but the code is simpler this way. Reported-by: syzbot+6693adf1698864d21734@syzkaller.appspotmail.com Reported-by: syzbot+a4aee3f42d7584d76761@syzkaller.appspotmail.com Cc: stable@kernel.org Signed-off-by: Tuomas Tynkkynen Link: https://lore.kernel.org/r/20200410123257.14559-1-tuomas.tynkkynen@iki.fi [johannes: add note about length limit] Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 7fe8207db6ae..7c4b7c31d07a 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3669,9 +3669,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) } if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { - hwname = kasprintf(GFP_KERNEL, "%.*s", - nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), - (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + hwname = kstrndup((char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]), + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + GFP_KERNEL); if (!hwname) return -ENOMEM; param.hwname = hwname; @@ -3691,9 +3691,9 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_RADIO_ID]) { idx = nla_get_u32(info->attrs[HWSIM_ATTR_RADIO_ID]); } else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) { - hwname = kasprintf(GFP_KERNEL, "%.*s", - nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), - (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME])); + hwname = kstrndup((char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]), + nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]), + GFP_KERNEL); if (!hwname) return -ENOMEM; } else -- cgit From a710d21451ff2917b9004b65ba2f0db6380671d5 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Wed, 8 Apr 2020 23:10:13 +0000 Subject: cfg80211: fix kernel-doc notation Update missing kernel-doc annotations and fix of related warnings at 'make htmldocs'. Signed-off-by: Lothar Rubusch Link: https://lore.kernel.org/r/20200408231013.28370-1-l.rubusch@gmail.com [fix indentation, attribute references] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c78bd4ff9e33..70e48f66dac8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -905,6 +905,8 @@ struct survey_info { * protocol frames. * @control_port_over_nl80211: TRUE if userspace expects to exchange control * port frames over NL80211 instead of the network interface. + * @control_port_no_preauth: disables pre-auth rx over the nl80211 control + * port for mac80211 * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key @@ -1222,6 +1224,7 @@ struct sta_txpwr { * @he_capa: HE capabilities of station * @he_capa_len: the length of the HE capabilities * @airtime_weight: airtime scheduler weight for this station + * @txpwr: transmit power for an associated station */ struct station_parameters { const u8 *supported_rates; @@ -4666,6 +4669,9 @@ struct wiphy_iftype_akm_suites { * @txq_memory_limit: configuration internal TX queue memory limit * @txq_quantum: configuration of internal TX queue scheduler quantum * + * @tx_queue_len: allow setting transmit queue len for drivers not using + * wake_tx_queue + * * @support_mbssid: can HW support association with nontransmitted AP * @support_only_he_mbssid: don't parse MBSSID elements if it is not * HE AP, in order to avoid compatibility issues. @@ -4681,6 +4687,10 @@ struct wiphy_iftype_akm_suites { * supported by the driver for each peer * @tid_config_support.max_retry: maximum supported retry count for * long/short retry configuration + * + * @max_data_retry_count: maximum supported per TID retry count for + * configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and + * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes */ struct wiphy { /* assign these fields before you register the wiphy */ -- cgit From 7dc7c41607d192ff660ba4ea82d517745c1d7523 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Apr 2020 20:53:51 +0200 Subject: rtw88: avoid unused function warnings The rtw88 driver defines emtpy functions with multiple indirections but gets one of these wrong: drivers/net/wireless/realtek/rtw88/pci.c:1347:12: error: 'rtw_pci_resume' defined but not used [-Werror=unused-function] 1347 | static int rtw_pci_resume(struct device *dev) | ^~~~~~~~~~~~~~ drivers/net/wireless/realtek/rtw88/pci.c:1342:12: error: 'rtw_pci_suspend' defined but not used [-Werror=unused-function] 1342 | static int rtw_pci_suspend(struct device *dev) Better simplify it to rely on the conditional reference in SIMPLE_DEV_PM_OPS(), and mark the functions as __maybe_unused to avoid warning about it. I'm not sure if these are needed at all given that the functions don't do anything, but they were only recently added. Fixes: 44bc17f7f5b3 ("rtw88: support wowlan feature for 8822c") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200408185413.218643-1-arnd@arndb.de --- drivers/net/wireless/realtek/rtw88/pci.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c index e37c71495c0d..1af87eb2e53a 100644 --- a/drivers/net/wireless/realtek/rtw88/pci.c +++ b/drivers/net/wireless/realtek/rtw88/pci.c @@ -1338,22 +1338,17 @@ static void rtw_pci_phy_cfg(struct rtw_dev *rtwdev) rtw_pci_link_cfg(rtwdev); } -#ifdef CONFIG_PM -static int rtw_pci_suspend(struct device *dev) +static int __maybe_unused rtw_pci_suspend(struct device *dev) { return 0; } -static int rtw_pci_resume(struct device *dev) +static int __maybe_unused rtw_pci_resume(struct device *dev) { return 0; } static SIMPLE_DEV_PM_OPS(rtw_pm_ops, rtw_pci_suspend, rtw_pci_resume); -#define RTW_PM_OPS (&rtw_pm_ops) -#else -#define RTW_PM_OPS NULL -#endif static int rtw_pci_claim(struct rtw_dev *rtwdev, struct pci_dev *pdev) { @@ -1582,7 +1577,7 @@ static struct pci_driver rtw_pci_driver = { .id_table = rtw_pci_id_table, .probe = rtw_pci_probe, .remove = rtw_pci_remove, - .driver.pm = RTW_PM_OPS, + .driver.pm = &rtw_pm_ops, }; module_pci_driver(rtw_pci_driver); -- cgit From 4178417cc5359c329790a4a8f4a6604612338cca Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Thu, 9 Apr 2020 15:17:52 -0700 Subject: arm, bpf: Fix offset overflow for BPF_MEM BPF_DW This patch fixes an incorrect check in how immediate memory offsets are computed for BPF_DW on arm. For BPF_LDX/ST/STX + BPF_DW, the 32-bit arm JIT breaks down an 8-byte access into two separate 4-byte accesses using off+0 and off+4. If off fits in imm12, the JIT emits a ldr/str instruction with the immediate and avoids the use of a temporary register. While the current check off <= 0xfff ensures that the first immediate off+0 doesn't overflow imm12, it's not sufficient for the second immediate off+4, which may cause the second access of BPF_DW to read/write the wrong address. This patch fixes the problem by changing the check to off <= 0xfff - 4 for BPF_DW, ensuring off+4 will never overflow. A side effect of simplifying the check is that it now allows using negative immediate offsets in ldr/str. This means that small negative offsets can also avoid the use of a temporary register. This patch introduces no new failures in test_verifier or test_bpf.c. Fixes: c5eae692571d6 ("ARM: net: bpf: improve 64-bit store implementation") Fixes: ec19e02b343db ("ARM: net: bpf: fix LDX instructions") Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200409221752.28448-1-luke.r.nels@gmail.com --- arch/arm/net/bpf_jit_32.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index d124f78e20ac..bf85d6db4931 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1000,21 +1000,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], arm_bpf_put_reg32(dst_hi, rd[0], ctx); } +static bool is_ldst_imm(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + case BPF_DW: + /* Need to make sure off+4 does not overflow. */ + off_max = 0xfff - 4; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s32 off_max; s8 rd; rd = arm_bpf_get_reg32(dst, tmp[1], ctx); - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; @@ -1043,18 +1057,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[], /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const s8 dst[], const s8 src, - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; - s32 off_max; - - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; -- cgit From 1f6cb19be2e231fe092f40decb71f066eba090d7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 10 Apr 2020 13:26:12 -0700 Subject: bpf: Prevent re-mmap()'ing BPF map as writable for initially r/o mapping VM_MAYWRITE flag during initial memory mapping determines if already mmap()'ed pages can be later remapped as writable ones through mprotect() call. To prevent user application to rewrite contents of memory-mapped as read-only and subsequently frozen BPF map, remove VM_MAYWRITE flag completely on initially read-only mapping. Alternatively, we could treat any memory-mapping on unfrozen map as writable and bump writecnt instead. But there is little legitimate reason to map BPF map as read-only and then re-mmap() it as writable through mprotect(), instead of just mmap()'ing it as read/write from the very beginning. Also, at the suggestion of Jann Horn, drop unnecessary refcounting in mmap operations. We can just rely on VMA holding reference to BPF map's file properly. Fixes: fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY") Reported-by: Jann Horn Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Jann Horn Link: https://lore.kernel.org/bpf/20200410202613.3679837-1-andriin@fb.com --- kernel/bpf/syscall.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64783da34202..d85f37239540 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -586,9 +586,7 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - bpf_map_inc_with_uref(map); - - if (vma->vm_flags & VM_WRITE) { + if (vma->vm_flags & VM_MAYWRITE) { mutex_lock(&map->freeze_mutex); map->writecnt++; mutex_unlock(&map->freeze_mutex); @@ -600,13 +598,11 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_WRITE) { + if (vma->vm_flags & VM_MAYWRITE) { mutex_lock(&map->freeze_mutex); map->writecnt--; mutex_unlock(&map->freeze_mutex); } - - bpf_map_put_with_uref(map); } static const struct vm_operations_struct bpf_map_default_vmops = { @@ -635,14 +631,16 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) /* set default open/close callbacks */ vma->vm_ops = &bpf_map_default_vmops; vma->vm_private_data = map; + vma->vm_flags &= ~VM_MAYEXEC; + if (!(vma->vm_flags & VM_WRITE)) + /* disallow re-mapping with PROT_WRITE */ + vma->vm_flags &= ~VM_MAYWRITE; err = map->ops->map_mmap(map, vma); if (err) goto out; - bpf_map_inc_with_uref(map); - - if (vma->vm_flags & VM_WRITE) + if (vma->vm_flags & VM_MAYWRITE) map->writecnt++; out: mutex_unlock(&map->freeze_mutex); -- cgit From 642c1654702731ab42a3be771bebbd6ef938f0dc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 10 Apr 2020 13:26:13 -0700 Subject: selftests/bpf: Validate frozen map contents stays frozen Test that frozen and mmap()'ed BPF map can't be mprotect()'ed as writable or executable memory. Also validate that "downgrading" from writable to read-only doesn't screw up internal writable count accounting for the purposes of map freezing. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200410202613.3679837-2-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/mmap.c | 62 ++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 16a814eb4d64..56d80adcf4bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -19,15 +19,16 @@ void test_mmap(void) const size_t map_sz = roundup_page(sizeof(struct map_data)); const int zero = 0, one = 1, two = 2, far = 1500; const long page_size = sysconf(_SC_PAGE_SIZE); - int err, duration = 0, i, data_map_fd; + int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd; struct bpf_map *data_map, *bss_map; void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; struct test_mmap__bss *bss_data; + struct bpf_map_info map_info; + __u32 map_info_sz = sizeof(map_info); struct map_data *map_data; struct test_mmap *skel; __u64 val = 0; - skel = test_mmap__open_and_load(); if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) return; @@ -36,6 +37,14 @@ void test_mmap(void) data_map = skel->maps.data_map; data_map_fd = bpf_map__fd(data_map); + /* get map's ID */ + memset(&map_info, 0, map_info_sz); + err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz); + if (CHECK(err, "map_get_info", "failed %d\n", errno)) + goto cleanup; + data_map_id = map_info.id; + + /* mmap BSS map */ bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED, bpf_map__fd(bss_map), 0); if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap", @@ -98,6 +107,10 @@ void test_mmap(void) "data_map freeze succeeded: err=%d, errno=%d\n", err, errno)) goto cleanup; + err = mprotect(map_mmaped, map_sz, PROT_READ); + if (CHECK(err, "mprotect_ro", "mprotect to r/o failed %d\n", errno)) + goto cleanup; + /* unmap R/W mapping */ err = munmap(map_mmaped, map_sz); map_mmaped = NULL; @@ -111,6 +124,12 @@ void test_mmap(void) map_mmaped = NULL; goto cleanup; } + err = mprotect(map_mmaped, map_sz, PROT_WRITE); + if (CHECK(!err, "mprotect_wr", "mprotect() succeeded unexpectedly!\n")) + goto cleanup; + err = mprotect(map_mmaped, map_sz, PROT_EXEC); + if (CHECK(!err, "mprotect_ex", "mprotect() succeeded unexpectedly!\n")) + goto cleanup; map_data = map_mmaped; /* map/unmap in a loop to test ref counting */ @@ -197,6 +216,45 @@ void test_mmap(void) CHECK_FAIL(map_data->val[far] != 3 * 321); munmap(tmp2, 4 * page_size); + + tmp1 = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0); + if (CHECK(tmp1 == MAP_FAILED, "last_mmap", "failed %d\n", errno)) + goto cleanup; + + test_mmap__destroy(skel); + skel = NULL; + CHECK_FAIL(munmap(bss_mmaped, bss_sz)); + bss_mmaped = NULL; + CHECK_FAIL(munmap(map_mmaped, map_sz)); + map_mmaped = NULL; + + /* map should be still held by active mmap */ + tmp_fd = bpf_map_get_fd_by_id(data_map_id); + if (CHECK(tmp_fd < 0, "get_map_by_id", "failed %d\n", errno)) { + munmap(tmp1, map_sz); + goto cleanup; + } + close(tmp_fd); + + /* this should release data map finally */ + munmap(tmp1, map_sz); + + /* we need to wait for RCU grace period */ + for (i = 0; i < 10000; i++) { + __u32 id = data_map_id - 1; + if (bpf_map_get_next_id(id, &id) || id > data_map_id) + break; + usleep(1); + } + + /* should fail to get map FD by non-existing ID */ + tmp_fd = bpf_map_get_fd_by_id(data_map_id); + if (CHECK(tmp_fd >= 0, "get_map_by_id_after", + "unexpectedly succeeded %d\n", tmp_fd)) { + close(tmp_fd); + goto cleanup; + } + cleanup: if (bss_mmaped) CHECK_FAIL(munmap(bss_mmaped, bss_sz)); -- cgit From 96b2eb6e77959b4b52f80e7a61d03db77606aac6 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Fri, 10 Apr 2020 11:06:12 +0900 Subject: tools, bpftool: Fix struct_ops command invalid pointer free In commit 65c93628599d ("bpftool: Add struct_ops support") a new type of command named struct_ops has been added. This command requires a kernel with CONFIG_DEBUG_INFO_BTF=y set and for retrieving BTF info in bpftool, the helper get_btf_vmlinux() is used. When running this command on kernel without BTF debug info, this will lead to 'btf_vmlinux' variable being an invalid(error) pointer. And by this, btf_free() causes a segfault when executing 'bpftool struct_ops'. This commit adds pointer validation with IS_ERR not to free invalid pointer, and this will fix the segfault issue. Fixes: 65c93628599d ("bpftool: Add struct_ops support") Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200410020612.2930667-1-danieltimlee@gmail.com --- tools/bpf/bpftool/struct_ops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 2a7befbd11ad..0fe0d584c57e 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -591,6 +591,8 @@ int do_struct_ops(int argc, char **argv) err = cmd_select(cmds, argc, argv, do_help); - btf__free(btf_vmlinux); + if (!IS_ERR(btf_vmlinux)) + btf__free(btf_vmlinux); + return err; } -- cgit From dfa74909cb6b846cbdabfc2c3c7de1d507fca075 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 12 Apr 2020 07:32:04 -0600 Subject: xdp: Reset prog in dev_change_xdp_fd when fd is negative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit mentioned in the Fixes tag reuses the local prog variable when looking up an expected_fd. The variable is not reset when fd < 0 causing a detach with the expected_fd set to actually call dev_xdp_install for the existing program. The end result is that the detach does not happen. Fixes: 92234c8f15c8 ("xdp: Support specifying expected existing program when attaching XDP") Signed-off-by: David Ahern Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200412133204.43847-1-dsahern@kernel.org --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index df8097b8e286..522288177bbd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8667,8 +8667,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, const struct net_device_ops *ops = dev->netdev_ops; enum bpf_netdev_command query; u32 prog_id, expected_id = 0; - struct bpf_prog *prog = NULL; bpf_op_t bpf_op, bpf_chk; + struct bpf_prog *prog; bool offload; int err; @@ -8734,6 +8734,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, } else { if (!prog_id) return 0; + prog = NULL; } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); -- cgit From 89f33dcadb349eb926a92633e2c5f61466afc596 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Mon, 13 Apr 2020 19:57:56 +0800 Subject: bpf: remove unneeded conversion to bool in __mark_reg_unknown This issue was detected by using the Coccinelle software: kernel/bpf/verifier.c:1259:16-21: WARNING: conversion to bool not needed here The conversion to bool is unneeded, remove it. Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/1586779076-101346-1-git-send-email-zou_wei@huawei.com --- kernel/bpf/verifier.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 04c6630cc18f..38cfcf701eeb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1255,8 +1255,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, reg->type = SCALAR_VALUE; reg->var_off = tnum_unknown; reg->frameno = 0; - reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? - true : false; + reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks; __mark_reg_unbounded(reg); } -- cgit From 68dac3eb50be32957ae6e1e6da9281a3b7c6658b Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 10 Apr 2020 12:16:16 +0900 Subject: net: phy: micrel: use genphy_read_status for KSZ9131 KSZ9131 will not work with some switches due to workaround for KSZ9031 introduced in commit d2fd719bcb0e83cb39cfee22ee800f98a56eceb3 ("net/phy: micrel: Add workaround for bad autoneg"). Use genphy_read_status instead of dedicated ksz9031_read_status. Fixes: bff5b4b37372 ("net: phy: micrel: add Microchip KSZ9131 initial driver") Signed-off-by: Atsushi Nemoto Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 05d20343b816..3a4d83fa52dc 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1204,7 +1204,7 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9131_config_init, - .read_status = ksz9031_read_status, + .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .get_sset_count = kszphy_get_sset_count, -- cgit From 0e631eee17dcea576ab922fa70e4fdbd596ee452 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 13 Apr 2020 13:57:14 +0100 Subject: rxrpc: Fix DATA Tx to disable nofrag for UDP on AF_INET6 socket Fix the DATA packet transmission to disable nofrag for UDPv4 on an AF_INET6 socket as well as UDPv6 when trying to transmit fragmentably. Without this, packets filled to the normal size used by the kernel AFS client of 1412 bytes be rejected by udp_sendmsg() with EMSGSIZE immediately. The ->sk_error_report() notification hook is called, but rxrpc doesn't generate a trace for it. This is a temporary fix; a more permanent solution needs to involve changing the size of the packets being filled in accordance with the MTU, which isn't currently done in AF_RXRPC. The reason for not doing so was that, barring the last packet in an rx jumbo packet, jumbos can only be assembled out of 1412-byte packets - and the plan was to construct jumbos on the fly at transmission time. Also, there's no point turning on IPV6_MTU_DISCOVER, since IPv6 has to engage in this anyway since fragmentation is only done by the sender. We can then condense the switch-statement in rxrpc_send_data_packet(). Fixes: 75b54cb57ca3 ("rxrpc: Add IPv6 support") Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/local_object.c | 9 --------- net/rxrpc/output.c | 44 ++++++++++++-------------------------------- 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a6c1349e965d..01135e54d95d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -165,15 +165,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) goto error; } - /* we want to set the don't fragment bit */ - opt = IPV6_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } - /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index bad3d2420344..90e263c6aa69 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -474,41 +474,21 @@ send_fragmentable: skb->tstamp = ktime_get_real(); switch (conn->params.local->srx.transport.family) { + case AF_INET6: case AF_INET: opt = IP_PMTUDISC_DONT; - ret = kernel_setsockopt(conn->params.local->socket, - SOL_IP, IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); - - opt = IP_PMTUDISC_DO; - kernel_setsockopt(conn->params.local->socket, SOL_IP, - IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - } - break; - -#ifdef CONFIG_AF_RXRPC_IPV6 - case AF_INET6: - opt = IPV6_PMTUDISC_DONT; - ret = kernel_setsockopt(conn->params.local->socket, - SOL_IPV6, IPV6_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); - - opt = IPV6_PMTUDISC_DO; - kernel_setsockopt(conn->params.local->socket, - SOL_IPV6, IPV6_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); - } + kernel_setsockopt(conn->params.local->socket, + SOL_IP, IP_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); + conn->params.peer->last_tx_at = ktime_get_seconds(); + + opt = IP_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, + SOL_IP, IP_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); break; -#endif default: BUG(); -- cgit From 555cd19d0c6a23b3faef949bccca4822cccc2eb7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 13 Apr 2020 10:33:10 -0700 Subject: ionic: add dynamic_debug header Add the appropriate header for using dynamic_hex_dump(), which seems to be incidentally included in some configurations but not all. Fixes: 7e4d47596b68 ("ionic: replay filters after fw upgrade") Reported-by: Randy Dunlap Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index f3c7dd1596ee..27b7eca19784 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -2,6 +2,7 @@ /* Copyright(c) 2017 - 2019 Pensando Systems, Inc */ #include +#include #include #include "ionic.h" -- cgit From 2c0df9f9eddbc87fa2ef8da86264995404d816b9 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 13 Apr 2020 10:33:11 -0700 Subject: ionic: fix unused assignment Remove an unused initialized value. Fixes: 7e4d47596b68 ("ionic: replay filters after fw upgrade") Reported-by: kbuild test robot Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index 27b7eca19784..80eeb7696e01 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -26,7 +26,7 @@ void ionic_rx_filter_replay(struct ionic_lif *lif) struct hlist_head *head; struct hlist_node *tmp; unsigned int i; - int err = 0; + int err; ac = &ctx.cmd.rx_filter_add; -- cgit From 34b5e6a33c1a8e466c3a73fd437f66fb16cb83ea Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 14 Apr 2020 02:34:38 +0200 Subject: net: dsa: mv88e6xxx: Configure MAC when using fixed link The 88e6185 is reporting it has detected a PHY, when a port is connected to an SFP. As a result, the fixed-phy configuration is not being applied. That then breaks packet transfer, since the port is reported as being down. Add additional conditions to check the interface mode, and if it is fixed always configure the port on link up/down, independent of the PPU status. Fixes: 30c4a5b0aad8 ("net: mv88e6xxx: use resolved link config in mac_link_up()") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 221593261e8f..dd8a5666a584 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -709,7 +709,8 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - if (!mv88e6xxx_port_ppu_updates(chip, port) && ops->port_set_link) + if ((!mv88e6xxx_port_ppu_updates(chip, port) || + mode == MLO_AN_FIXED) && ops->port_set_link) err = ops->port_set_link(chip, port, LINK_FORCED_DOWN); mv88e6xxx_reg_unlock(chip); @@ -731,7 +732,7 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - if (!mv88e6xxx_port_ppu_updates(chip, port)) { + if (!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) { /* FIXME: for an automedia port, should we force the link * down here - what if the link comes up due to "other" media * while we're bringing the port up, how is the exclusivity -- cgit From 3be98b2d5fbca3da7c4df0477eed95bfb5b83d64 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 14 Apr 2020 02:34:39 +0200 Subject: net: dsa: Down cpu/dsa ports phylink will control DSA and CPU ports can be configured in two ways. By default, the driver should configure such ports to there maximum bandwidth. For most use cases, this is sufficient. When this default is insufficient, a phylink instance can be bound to such ports, and phylink will configure the port, e.g. based on fixed-link properties. phylink assumes the port is initially down. Given that the driver should have already configured it to its maximum speed, ask the driver to down the port before instantiating the phylink instance. Fixes: 30c4a5b0aad8 ("net: mv88e6xxx: use resolved link config in mac_link_up()") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/port.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/dsa/port.c b/net/dsa/port.c index 231b2d494f1c..a58fdd362574 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -670,11 +670,16 @@ int dsa_port_link_register_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; struct device_node *phy_np; + int port = dp->index; if (!ds->ops->adjust_link) { phy_np = of_parse_phandle(dp->dn, "phy-handle", 0); - if (of_phy_is_fixed_link(dp->dn) || phy_np) + if (of_phy_is_fixed_link(dp->dn) || phy_np) { + if (ds->ops->phylink_mac_link_down) + ds->ops->phylink_mac_link_down(ds, port, + MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); return dsa_port_phylink_register(dp); + } return 0; } -- cgit From a7a0d6269652846671312b29992143f56e2866b8 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Tue, 14 Apr 2020 10:12:34 +0900 Subject: net: stmmac: socfpga: Allow all RGMII modes Allow all the RGMII modes to be used. (Not only "rgmii", "rgmii-id" but "rgmii-txid", "rgmii-rxid") Signed-off-by: Atsushi Nemoto Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index e0212d2fc2a1..fa32cd5b418e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -241,6 +241,8 @@ static int socfpga_set_phy_mode_common(int phymode, u32 *val) switch (phymode) { case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII; break; case PHY_INTERFACE_MODE_MII: -- cgit From c799fca8baf18d1bbbbad6c3b736eefbde8bdb90 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Apr 2020 12:27:08 -0300 Subject: net/cxgb4: Check the return from t4_query_params properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Positive return values are also failures that don't set val, although this probably can't happen. Fixes gcc 10 warning: drivers/net/ethernet/chelsio/cxgb4/t4_hw.c: In function ‘t4_phy_fw_ver’: drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:3747:14: warning: ‘val’ may be used uninitialized in this function [-Wmaybe-uninitialized] 3747 | *phy_fw_ver = val; Fixes: 01b6961410b7 ("cxgb4: Add PHY firmware support for T420-BT cards") Signed-off-by: Jason Gunthorpe Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 239f678a94ed..2a3480fc1d91 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3742,7 +3742,7 @@ int t4_phy_fw_ver(struct adapter *adap, int *phy_fw_ver) FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_PHYFW_VERSION)); ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); - if (ret < 0) + if (ret) return ret; *phy_fw_ver = val; return 0; -- cgit From dd649b4ff0127559950965d739cc63efae50ecd9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 14 Apr 2020 20:49:03 +0100 Subject: net: marvell10g: report firmware version Report the firmware version when probing the PHY to allow issues attributable to firmware to be diagnosed. Tested-by: Matteo Croce Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 7621badae64d..748532d9e1ae 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -33,6 +33,8 @@ #define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa) enum { + MV_PMA_FW_VER0 = 0xc011, + MV_PMA_FW_VER1 = 0xc012, MV_PMA_BOOT = 0xc050, MV_PMA_BOOT_FATAL = BIT(0), @@ -83,6 +85,8 @@ enum { }; struct mv3310_priv { + u32 firmware_ver; + struct device *hwmon_dev; char *hwmon_name; }; @@ -355,6 +359,22 @@ static int mv3310_probe(struct phy_device *phydev) dev_set_drvdata(&phydev->mdio.dev, priv); + ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MV_PMA_FW_VER0); + if (ret < 0) + return ret; + + priv->firmware_ver = ret << 16; + + ret = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MV_PMA_FW_VER1); + if (ret < 0) + return ret; + + priv->firmware_ver |= ret; + + phydev_info(phydev, "Firmware version %u.%u.%u.%u\n", + priv->firmware_ver >> 24, (priv->firmware_ver >> 16) & 255, + (priv->firmware_ver >> 8) & 255, priv->firmware_ver & 255); + /* Powering down the port when not in use saves about 600mW */ ret = mv3310_power_down(phydev); if (ret) -- cgit From 8f48c2ac85eda8d8a01c83c6d73f891c43ef182d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 14 Apr 2020 20:49:08 +0100 Subject: net: marvell10g: soft-reset the PHY when coming out of low power Soft-reset the PHY when coming out of low power mode, which seems to be necessary with firmware versions 0.3.3.0 and 0.3.10.0. This depends on ("net: marvell10g: report firmware version") Fixes: c9cc1c815d36 ("net: phy: marvell10g: place in powersave mode at probe") Reported-by: Matteo Croce Tested-by: Matteo Croce Reviewed-by: Andrew Lunn Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 748532d9e1ae..95e3f4644aeb 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -75,7 +75,8 @@ enum { /* Vendor2 MMD registers */ MV_V2_PORT_CTRL = 0xf001, - MV_V2_PORT_CTRL_PWRDOWN = 0x0800, + MV_V2_PORT_CTRL_SWRST = BIT(15), + MV_V2_PORT_CTRL_PWRDOWN = BIT(11), MV_V2_TEMP_CTRL = 0xf08a, MV_V2_TEMP_CTRL_MASK = 0xc000, MV_V2_TEMP_CTRL_SAMPLE = 0x0000, @@ -239,8 +240,17 @@ static int mv3310_power_down(struct phy_device *phydev) static int mv3310_power_up(struct phy_device *phydev) { - return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL, - MV_V2_PORT_CTRL_PWRDOWN); + struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev); + int ret; + + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL, + MV_V2_PORT_CTRL_PWRDOWN); + + if (priv->firmware_ver < 0x00030000) + return ret; + + return phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL, + MV_V2_PORT_CTRL_SWRST); } static int mv3310_reset(struct phy_device *phydev, u32 unit) -- cgit From 52e04b4ce5d03775b6a78f3ed1097480faacc9fd Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 7 Apr 2020 15:40:55 +0530 Subject: mac80211: fix race in ieee80211_register_hw() A race condition leading to a kernel crash is observed during invocation of ieee80211_register_hw() on a dragonboard410c device having wcn36xx driver built as a loadable module along with a wifi manager in user-space waiting for a wifi device (wlanX) to be active. Sequence diagram for a particular kernel crash scenario: user-space ieee80211_register_hw() ieee80211_tasklet_handler() ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | | | |<---phy0----wiphy_register() | |-----iwd if_add---->| | | |<---IRQ----(RX packet) | Kernel crash | | due to unallocated | | workqueue. | | | | | alloc_ordered_workqueue() | | | | | Misc wiphy init. | | | | | ieee80211_if_add() | | | | As evident from above sequence diagram, this race condition isn't specific to a particular wifi driver but rather the initialization sequence in ieee80211_register_hw() needs to be fixed. So re-order the initialization sequence and the updated sequence diagram would look like: user-space ieee80211_register_hw() ieee80211_tasklet_handler() ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | | | | alloc_ordered_workqueue() | | | | | Misc wiphy init. | | | | |<---phy0----wiphy_register() | |-----iwd if_add---->| | | |<---IRQ----(RX packet) | | | | ieee80211_if_add() | | | | Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Link: https://lore.kernel.org/r/1586254255-28713-1-git-send-email-sumit.garg@linaro.org [Johannes: fix rtnl imbalances] Signed-off-by: Johannes Berg --- net/mac80211/main.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8345926193de..0e9ad60fb2b3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1069,7 +1069,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; if (hw->max_signal <= 0) { result = -EINVAL; - goto fail_wiphy_register; + goto fail_workqueue; } } @@ -1135,7 +1135,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) result = ieee80211_init_cipher_suites(local); if (result < 0) - goto fail_wiphy_register; + goto fail_workqueue; if (!local->ops->remain_on_channel) local->hw.wiphy->max_remain_on_channel_duration = 5000; @@ -1161,10 +1161,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM; - result = wiphy_register(local->hw.wiphy); - if (result < 0) - goto fail_wiphy_register; - /* * We use the number of queues for feature tests (QoS, HT) internally * so restrict them appropriately. @@ -1217,9 +1213,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_flows; rtnl_lock(); - result = ieee80211_init_rate_ctrl_alg(local, hw->rate_control_algorithm); + rtnl_unlock(); if (result < 0) { wiphy_debug(local->hw.wiphy, "Failed to initialize rate control algorithm\n"); @@ -1273,6 +1269,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->sband_allocated |= BIT(band); } + result = wiphy_register(local->hw.wiphy); + if (result < 0) + goto fail_wiphy_register; + + rtnl_lock(); + /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) && !ieee80211_hw_check(hw, NO_AUTO_VIF)) { @@ -1312,17 +1314,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) #if defined(CONFIG_INET) || defined(CONFIG_IPV6) fail_ifa: #endif + wiphy_unregister(local->hw.wiphy); + fail_wiphy_register: rtnl_lock(); rate_control_deinitialize(local); ieee80211_remove_interfaces(local); - fail_rate: rtnl_unlock(); + fail_rate: fail_flows: ieee80211_led_exit(local); destroy_workqueue(local->workqueue); fail_workqueue: - wiphy_unregister(local->hw.wiphy); - fail_wiphy_register: if (local->wiphy_ciphers_allocated) kfree(local->hw.wiphy->cipher_suites); kfree(local->int_scan_req); @@ -1372,8 +1374,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) skb_queue_purge(&local->skb_queue_unreliable); skb_queue_purge(&local->skb_queue_tdls_chsw); - destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); + destroy_workqueue(local->workqueue); ieee80211_led_exit(local); kfree(local->int_scan_req); } -- cgit From 93e2d04a1888668183f3fb48666e90b9b31d29e6 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Sat, 28 Mar 2020 19:23:24 +0530 Subject: mac80211: fix channel switch trigger from unknown mesh peer Previously mesh channel switch happens if beacon contains CSA IE without checking the mesh peer info. Due to that channel switch happens even if the beacon is not from its own mesh peer. Fixing that by checking if the CSA originated from the same mesh network before proceeding for channel switch. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1585403604-29274-1-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d09b3c789314..36978a0e5000 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1257,15 +1257,15 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal) mesh_neighbour_update(sdata, mgmt->sa, &elems, rx_status); + + if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT && + !sdata->vif.csa_active) + ieee80211_mesh_process_chnswitch(sdata, &elems, true); } if (ifmsh->sync_ops) ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, &elems, rx_status); - - if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT && - !sdata->vif.csa_active) - ieee80211_mesh_process_chnswitch(sdata, &elems, true); } int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) @@ -1373,6 +1373,9 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(pos, len - baselen, true, &elems, mgmt->bssid, NULL); + if (!mesh_matches_local(sdata, &elems)) + return; + ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false; -- cgit From 99e3a236dd43d06c65af0a2ef9cb44306aef6e02 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 14 Apr 2020 09:35:15 +0200 Subject: xsk: Add missing check on user supplied headroom size Add a check that the headroom cannot be larger than the available space in the chunk. In the current code, a malicious user can set the headroom to a value larger than the chunk size minus the fixed XDP headroom. That way packets with a length larger than the supported size in the umem could get accepted and result in an out-of-bounds write. Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt") Reported-by: Bui Quang Minh Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Link: https://bugzilla.kernel.org/show_bug.cgi?id=207225 Link: https://lore.kernel.org/bpf/1586849715-23490-1-git-send-email-magnus.karlsson@intel.com --- net/xdp/xdp_umem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index fa7bb5e060d0..ed7a6060f73c 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -343,7 +343,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) u32 chunk_size = mr->chunk_size, headroom = mr->headroom; unsigned int chunks, chunks_per_page; u64 addr = mr->addr, size = mr->len; - int size_chk, err; + int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { /* Strictly speaking we could support this, if: @@ -382,8 +382,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM; - if (size_chk < 0) + if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; umem->address = (unsigned long)addr; -- cgit From 25498a1969bf3687c29c29bbac92821d7a0f8b4a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Apr 2020 11:26:45 -0700 Subject: libbpf: Always specify expected_attach_type on program load if supported For some types of BPF programs that utilize expected_attach_type, libbpf won't set load_attr.expected_attach_type, even if expected_attach_type is known from section definition. This was done to preserve backwards compatibility with old kernels that didn't recognize expected_attach_type attribute yet (which was added in 5e43f899b03a ("bpf: Check attach type at prog load time"). But this is problematic for some BPF programs that utilize newer features that require kernel to know specific expected_attach_type (e.g., extended set of return codes for cgroup_skb/egress programs). This patch makes libbpf specify expected_attach_type by default, but also detect support for this field in kernel and not set it during program load. This allows to have a good metadata for bpf_program (e.g., bpf_program__get_extected_attach_type()), but still work with old kernels (for cases where it can work at all). Additionally, due to expected_attach_type being always set for recognized program types, bpf_program__attach_cgroup doesn't have to do extra checks to determine correct attach type, so remove that additional logic. Also adjust section_names selftest to account for this change. More detailed discussion can be found in [0]. [0] https://lore.kernel.org/bpf/20200412003604.GA15986@rdna-mbp.dhcp.thefacebook.com/ Fixes: 5cf1e9145630 ("bpf: cgroup inet skb programs can return 0 to 3") Fixes: 5e43f899b03a ("bpf: Check attach type at prog load time") Reported-by: Andrey Ignatov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/20200414182645.1368174-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 126 ++++++++++++++------- .../selftests/bpf/prog_tests/section_names.c | 42 ++++--- 2 files changed, 109 insertions(+), 59 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ff9174282a8c..8f480e29a6b0 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -178,6 +178,8 @@ struct bpf_capabilities { __u32 array_mmap:1; /* BTF_FUNC_GLOBAL is supported */ __u32 btf_func_global:1; + /* kernel support for expected_attach_type in BPF_PROG_LOAD */ + __u32 exp_attach_type:1; }; enum reloc_type { @@ -194,6 +196,22 @@ struct reloc_desc { int sym_off; }; +struct bpf_sec_def; + +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +struct bpf_sec_def { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + bool is_exp_attach_type_optional; + bool is_attachable; + bool is_attach_btf; + attach_fn_t attach_fn; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -204,6 +222,7 @@ struct bpf_program { char *name; int prog_ifindex; char *section_name; + const struct bpf_sec_def *sec_def; /* section_name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ @@ -3315,6 +3334,37 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj) return 0; } +static int +bpf_object__probe_exp_attach_type(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd; + + memset(&attr, 0, sizeof(attr)); + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + fd = bpf_load_program_xattr(&attr, NULL, 0); + if (fd >= 0) { + obj->caps.exp_attach_type = 1; + close(fd); + return 1; + } + return 0; +} + static int bpf_object__probe_caps(struct bpf_object *obj) { @@ -3325,6 +3375,7 @@ bpf_object__probe_caps(struct bpf_object *obj) bpf_object__probe_btf_func_global, bpf_object__probe_btf_datasec, bpf_object__probe_array_mmap, + bpf_object__probe_exp_attach_type, }; int i, ret; @@ -4861,7 +4912,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); load_attr.prog_type = prog->type; - load_attr.expected_attach_type = prog->expected_attach_type; + /* old kernels might not support specifying expected_attach_type */ + if (!prog->caps->exp_attach_type && prog->sec_def && + prog->sec_def->is_exp_attach_type_optional) + load_attr.expected_attach_type = 0; + else + load_attr.expected_attach_type = prog->expected_attach_type; if (prog->caps->name) load_attr.name = prog->name; load_attr.insns = insns; @@ -5062,6 +5118,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } +static const struct bpf_sec_def *find_sec_def(const char *sec_name); + static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) @@ -5117,24 +5175,17 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_object__elf_finish(obj); bpf_object__for_each_program(prog, obj) { - enum bpf_prog_type prog_type; - enum bpf_attach_type attach_type; - - if (prog->type != BPF_PROG_TYPE_UNSPEC) - continue; - - err = libbpf_prog_type_by_name(prog->section_name, &prog_type, - &attach_type); - if (err == -ESRCH) + prog->sec_def = find_sec_def(prog->section_name); + if (!prog->sec_def) /* couldn't guess, but user might manually specify */ continue; - if (err) - goto out; - bpf_program__set_type(prog, prog_type); - bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING || - prog_type == BPF_PROG_TYPE_EXT) + bpf_program__set_type(prog, prog->sec_def->prog_type); + bpf_program__set_expected_attach_type(prog, + prog->sec_def->expected_attach_type); + + if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || + prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); } @@ -6223,23 +6274,32 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \ - { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \ + attachable, attach_btf) \ + { \ + .sec = string, \ + .len = sizeof(string) - 1, \ + .prog_type = ptype, \ + .expected_attach_type = eatype, \ + .is_exp_attach_type_optional = eatype_optional, \ + .is_attachable = attachable, \ + .is_attach_btf = attach_btf, \ + } /* Programs that can NOT be attached. */ #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) /* Programs that can be attached. */ #define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype) + BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0) /* Programs that must specify expected attach type at load time. */ #define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype) + BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0) /* Programs that use BTF to identify attach point */ #define BPF_PROG_BTF(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0) + BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1) /* Programs that can be attached but attach type can't be identified by section * name. Kept for backward compatibility. @@ -6253,11 +6313,6 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, __VA_ARGS__ \ } -struct bpf_sec_def; - -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, - struct bpf_program *prog); - static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, struct bpf_program *prog); static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, @@ -6269,17 +6324,6 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, struct bpf_program *prog); -struct bpf_sec_def { - const char *sec; - size_t len; - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - bool is_attachable; - bool is_attach_btf; - enum bpf_attach_type attach_type; - attach_fn_t attach_fn; -}; - static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), @@ -6713,7 +6757,7 @@ int libbpf_attach_type_by_name(const char *name, continue; if (!section_defs[i].is_attachable) return -EINVAL; - *attach_type = section_defs[i].attach_type; + *attach_type = section_defs[i].expected_attach_type; return 0; } pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); @@ -7542,7 +7586,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, struct bpf_link * bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) { - const struct bpf_sec_def *sec_def; enum bpf_attach_type attach_type; char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; @@ -7561,11 +7604,6 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) link->detach = &bpf_link__detach_fd; attach_type = bpf_program__get_expected_attach_type(prog); - if (!attach_type) { - sec_def = find_sec_def(bpf_program__title(prog, false)); - if (sec_def) - attach_type = sec_def->attach_type; - } link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL); if (link_fd < 0) { link_fd = -errno; diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 9d9351dc2ded..713167449c98 100644 --- a/tools/testing/selftests/bpf/prog_tests/section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -43,18 +43,18 @@ static struct sec_name_test tests[] = { {"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} }, { "cgroup_skb/ingress", - {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, + {0, BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS}, {0, BPF_CGROUP_INET_INGRESS}, }, { "cgroup_skb/egress", - {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, + {0, BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_EGRESS}, {0, BPF_CGROUP_INET_EGRESS}, }, {"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} }, { "cgroup/sock", - {0, BPF_PROG_TYPE_CGROUP_SOCK, 0}, + {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE}, {0, BPF_CGROUP_INET_SOCK_CREATE}, }, { @@ -69,26 +69,38 @@ static struct sec_name_test tests[] = { }, { "cgroup/dev", - {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0}, + {0, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE}, {0, BPF_CGROUP_DEVICE}, }, - {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} }, + { + "sockops", + {0, BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS}, + {0, BPF_CGROUP_SOCK_OPS}, + }, { "sk_skb/stream_parser", - {0, BPF_PROG_TYPE_SK_SKB, 0}, + {0, BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_PARSER}, {0, BPF_SK_SKB_STREAM_PARSER}, }, { "sk_skb/stream_verdict", - {0, BPF_PROG_TYPE_SK_SKB, 0}, + {0, BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_VERDICT}, {0, BPF_SK_SKB_STREAM_VERDICT}, }, {"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} }, - {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} }, - {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} }, + { + "sk_msg", + {0, BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT}, + {0, BPF_SK_MSG_VERDICT}, + }, + { + "lirc_mode2", + {0, BPF_PROG_TYPE_LIRC_MODE2, BPF_LIRC_MODE2}, + {0, BPF_LIRC_MODE2}, + }, { "flow_dissector", - {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0}, + {0, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_FLOW_DISSECTOR}, {0, BPF_FLOW_DISSECTOR}, }, { @@ -158,17 +170,17 @@ static void test_prog_type_by_name(const struct sec_name_test *test) &expected_attach_type); CHECK(rc != test->expected_load.rc, "check_code", - "prog: unexpected rc=%d for %s", rc, test->sec_name); + "prog: unexpected rc=%d for %s\n", rc, test->sec_name); if (rc) return; CHECK(prog_type != test->expected_load.prog_type, "check_prog_type", - "prog: unexpected prog_type=%d for %s", + "prog: unexpected prog_type=%d for %s\n", prog_type, test->sec_name); CHECK(expected_attach_type != test->expected_load.expected_attach_type, - "check_attach_type", "prog: unexpected expected_attach_type=%d for %s", + "check_attach_type", "prog: unexpected expected_attach_type=%d for %s\n", expected_attach_type, test->sec_name); } @@ -180,13 +192,13 @@ static void test_attach_type_by_name(const struct sec_name_test *test) rc = libbpf_attach_type_by_name(test->sec_name, &attach_type); CHECK(rc != test->expected_attach.rc, "check_ret", - "attach: unexpected rc=%d for %s", rc, test->sec_name); + "attach: unexpected rc=%d for %s\n", rc, test->sec_name); if (rc) return; CHECK(attach_type != test->expected_attach.attach_type, - "check_attach_type", "attach: unexpected attach_type=%d for %s", + "check_attach_type", "attach: unexpected attach_type=%d for %s\n", attach_type, test->sec_name); } -- cgit From 49b452c382da2c2d1ccee1265cbb92da905c82f7 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 14 Apr 2020 16:50:24 +0200 Subject: libbpf: Fix type of old_fd in bpf_xdp_set_link_opts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'old_fd' parameter used for atomic replacement of XDP programs is supposed to be an FD, but was left as a u32 from an earlier iteration of the patch that added it. It was converted to an int when read, so things worked correctly even with negative values, but better change the definition to correctly reflect the intention. Fixes: bd5ca3ef93cd ("libbpf: Add function to set link XDP fd while specifying old program") Reported-by: David Ahern Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: David Ahern Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200414145025.182163-1-toke@redhat.com --- tools/lib/bpf/libbpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 44df1d3e7287..f1dacecb1619 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -458,7 +458,7 @@ struct xdp_link_info { struct bpf_xdp_set_link_opts { size_t sz; - __u32 old_fd; + int old_fd; }; #define bpf_xdp_set_link_opts__last_field old_fd -- cgit From c6c111523d9e697bfb463870759825be5d6caff6 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 14 Apr 2020 16:50:25 +0200 Subject: selftests/bpf: Check for correct program attach/detach in xdp_attach test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit David Ahern noticed that there was a bug in the EXPECTED_FD code so programs did not get detached properly when that parameter was supplied. This case was not included in the xdp_attach tests; so let's add it to be sure that such a bug does not sneak back in down. Fixes: 87854a0b57b3 ("selftests/bpf: Add tests for attaching XDP programs") Reported-by: David Ahern Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200414145025.182163-2-toke@redhat.com --- .../testing/selftests/bpf/prog_tests/xdp_attach.c | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index 05b294d6b923..15ef3531483e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -6,19 +6,34 @@ void test_xdp_attach(void) { + __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; const char *file = "./test_xdp.o"; + struct bpf_prog_info info = {}; int err, fd1, fd2, fd3; - __u32 duration = 0; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); + len = sizeof(info); + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); if (CHECK_FAIL(err)) return; + err = bpf_obj_get_info_by_fd(fd1, &info, &len); + if (CHECK_FAIL(err)) + goto out_1; + id1 = info.id; + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); if (CHECK_FAIL(err)) goto out_1; + + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(fd2, &info, &len); + if (CHECK_FAIL(err)) + goto out_2; + id2 = info.id; + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); if (CHECK_FAIL(err)) goto out_2; @@ -28,6 +43,11 @@ void test_xdp_attach(void) if (CHECK(err, "load_ok", "initial load failed")) goto out_close; + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != id1, "id1_check", + "loaded prog id %u != id1 %u, err %d", id0, id1, err)) + goto out_close; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts); if (CHECK(!err, "load_fail", "load with expected id didn't fail")) @@ -37,6 +57,10 @@ void test_xdp_attach(void) err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts); if (CHECK(err, "replace_ok", "replace valid old_fd failed")) goto out; + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != id2, "id2_check", + "loaded prog id %u != id2 %u, err %d", id0, id2, err)) + goto out_close; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts); if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail")) @@ -51,6 +75,10 @@ void test_xdp_attach(void) if (CHECK(err, "remove_ok", "remove valid old_fd failed")) goto out; + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); + if (CHECK(err || id0 != 0, "unload_check", + "loaded prog id %u != 0, err %d", id0, err)) + goto out_close; out: bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); out_close: -- cgit From 87b0f983f66f23762921129fd35966eddc3f2dae Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Apr 2020 22:36:15 +0300 Subject: net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 5 +-- drivers/net/ethernet/mscc/ocelot.c | 84 +++++++++++++++++++------------------- drivers/net/ethernet/mscc/ocelot.h | 2 - include/soc/mscc/ocelot.h | 4 +- 4 files changed, 47 insertions(+), 48 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 79ca3aadb864..d0a3764ff0cf 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -46,11 +46,8 @@ static int felix_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { struct ocelot *ocelot = ds->priv; - bool vlan_aware; - vlan_aware = dsa_port_is_vlan_filtering(dsa_to_port(ds, port)); - - return ocelot_fdb_add(ocelot, port, addr, vid, vlan_aware); + return ocelot_fdb_add(ocelot, port, addr, vid); } static int felix_fdb_del(struct dsa_switch *ds, int port, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index b4731df186f4..a8c48a4a708f 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -183,44 +183,47 @@ static void ocelot_vlan_mode(struct ocelot *ocelot, int port, ocelot_write(ocelot, val, ANA_VLANMASK); } -void ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, - bool vlan_aware) +static int ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, + u16 vid) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - u32 val; + u32 val = 0; - if (vlan_aware) - val = ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | - ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1); - else - val = 0; - ocelot_rmw_gix(ocelot, val, - ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | - ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, - ANA_PORT_VLAN_CFG, port); + if (ocelot_port->vid != vid) { + /* Always permit deleting the native VLAN (vid = 0) */ + if (ocelot_port->vid && vid) { + dev_err(ocelot->dev, + "Port already has a native VLAN: %d\n", + ocelot_port->vid); + return -EBUSY; + } + ocelot_port->vid = vid; + } + + ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_VID(vid), + REW_PORT_VLAN_CFG_PORT_VID_M, + REW_PORT_VLAN_CFG, port); - if (vlan_aware && !ocelot_port->vid) + if (ocelot_port->vlan_aware && !ocelot_port->vid) /* If port is vlan-aware and tagged, drop untagged and priority * tagged frames. */ val = ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; - else - val = 0; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, ANA_PORT_DROP_CFG, port); - if (vlan_aware) { + if (ocelot_port->vlan_aware) { if (ocelot_port->vid) /* Tag all frames except when VID == DEFAULT_VLAN */ - val |= REW_TAG_CFG_TAG_CFG(1); + val = REW_TAG_CFG_TAG_CFG(1); else /* Tag all frames */ - val |= REW_TAG_CFG_TAG_CFG(3); + val = REW_TAG_CFG_TAG_CFG(3); } else { /* Port tagging disabled. */ val = REW_TAG_CFG_TAG_CFG(0); @@ -228,31 +231,31 @@ void ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ocelot_rmw_gix(ocelot, val, REW_TAG_CFG_TAG_CFG_M, REW_TAG_CFG, port); + + return 0; } -EXPORT_SYMBOL(ocelot_port_vlan_filtering); -static int ocelot_port_set_native_vlan(struct ocelot *ocelot, int port, - u16 vid) +void ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, + bool vlan_aware) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + u32 val; - if (ocelot_port->vid != vid) { - /* Always permit deleting the native VLAN (vid = 0) */ - if (ocelot_port->vid && vid) { - dev_err(ocelot->dev, - "Port already has a native VLAN: %d\n", - ocelot_port->vid); - return -EBUSY; - } - ocelot_port->vid = vid; - } + ocelot_port->vlan_aware = vlan_aware; - ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_VID(vid), - REW_PORT_VLAN_CFG_PORT_VID_M, - REW_PORT_VLAN_CFG, port); + if (vlan_aware) + val = ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | + ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1); + else + val = 0; + ocelot_rmw_gix(ocelot, val, + ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA | + ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, + ANA_PORT_VLAN_CFG, port); - return 0; + ocelot_port_set_native_vlan(ocelot, port, ocelot_port->vid); } +EXPORT_SYMBOL(ocelot_port_vlan_filtering); /* Default vlan to clasify for untagged frames (may be zero) */ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, u16 pvid) @@ -873,12 +876,12 @@ static void ocelot_get_stats64(struct net_device *dev, } int ocelot_fdb_add(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid, bool vlan_aware) + const unsigned char *addr, u16 vid) { struct ocelot_port *ocelot_port = ocelot->ports[port]; if (!vid) { - if (!vlan_aware) + if (!ocelot_port->vlan_aware) /* If the bridge is not VLAN aware and no VID was * provided, set it to pvid to ensure the MAC entry * matches incoming untagged packets @@ -905,7 +908,7 @@ static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct ocelot *ocelot = priv->port.ocelot; int port = priv->chip_port; - return ocelot_fdb_add(ocelot, port, addr, vid, priv->vlan_aware); + return ocelot_fdb_add(ocelot, port, addr, vid); } int ocelot_fdb_del(struct ocelot *ocelot, int port, @@ -1496,8 +1499,8 @@ static int ocelot_port_attr_set(struct net_device *dev, ocelot_port_attr_ageing_set(ocelot, port, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - priv->vlan_aware = attr->u.vlan_filtering; - ocelot_port_vlan_filtering(ocelot, port, priv->vlan_aware); + ocelot_port_vlan_filtering(ocelot, port, + attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: ocelot_port_attr_mc_set(ocelot, port, !attr->u.mc_disabled); @@ -1868,7 +1871,6 @@ static int ocelot_netdevice_port_event(struct net_device *dev, } else { err = ocelot_port_bridge_leave(ocelot, port, info->upper_dev); - priv->vlan_aware = false; } } if (netif_is_lag_master(info->upper_dev)) { diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index e34ef8380eb3..641af929497f 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -56,8 +56,6 @@ struct ocelot_port_private { struct phy_device *phy; u8 chip_port; - u8 vlan_aware; - struct phy *serdes; struct ocelot_port_tc tc; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ebffcb36a7e3..6d6a3947c8b7 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -476,6 +476,8 @@ struct ocelot_port { void __iomem *regs; + bool vlan_aware; + /* Ingress default VLAN (pvid) */ u16 pvid; @@ -610,7 +612,7 @@ int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, int ocelot_fdb_dump(struct ocelot *ocelot, int port, dsa_fdb_dump_cb_t *cb, void *data); int ocelot_fdb_add(struct ocelot *ocelot, int port, - const unsigned char *addr, u16 vid, bool vlan_aware); + const unsigned char *addr, u16 vid); int ocelot_fdb_del(struct ocelot *ocelot, int port, const unsigned char *addr, u16 vid); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, -- cgit From 7dba92037baf3fa00b4880a31fd532542264994c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 14 Apr 2020 20:02:07 -0300 Subject: net/rds: Use ERR_PTR for rds_message_alloc_sgs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning the error code via a 'int *ret' when the function returns a pointer is very un-kernely and causes gcc 10's static analysis to choke: net/rds/message.c: In function ‘rds_message_map_pages’: net/rds/message.c:358:10: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized] 358 | return ERR_PTR(ret); Use a typical ERR_PTR return instead. Signed-off-by: Jason Gunthorpe Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/message.c | 19 ++++++------------- net/rds/rdma.c | 12 ++++++++---- net/rds/rds.h | 3 +-- net/rds/send.c | 6 ++++-- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index bbecb8cb873e..071a261fdaab 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -308,26 +308,20 @@ out: /* * RDS ops use this to grab SG entries from the rm's sg pool. */ -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, - int *ret) +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) { struct scatterlist *sg_first = (struct scatterlist *) &rm[1]; struct scatterlist *sg_ret; - if (WARN_ON(!ret)) - return NULL; - if (nents <= 0) { pr_warn("rds: alloc sgs failed! nents <= 0\n"); - *ret = -EINVAL; - return NULL; + return ERR_PTR(-EINVAL); } if (rm->m_used_sgs + nents > rm->m_total_sgs) { pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n", rm->m_total_sgs, rm->m_used_sgs, nents); - *ret = -ENOMEM; - return NULL; + return ERR_PTR(-ENOMEM); } sg_ret = &sg_first[rm->m_used_sgs]; @@ -343,7 +337,6 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in unsigned int i; int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); int extra_bytes = num_sgs * sizeof(struct scatterlist); - int ret; rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); if (!rm) @@ -352,10 +345,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); - if (!rm->data.op_sg) { + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (IS_ERR(rm->data.op_sg)) { rds_message_put(rm); - return ERR_PTR(ret); + return ERR_CAST(rm->data.op_sg); } for (i = 0; i < rm->data.op_nents; ++i) { diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 113e442101ce..a7ae11846cd7 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -665,9 +665,11 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, op->op_odp_mr = NULL; WARN_ON(!nr_pages); - op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret); - if (!op->op_sg) + op->op_sg = rds_message_alloc_sgs(rm, nr_pages); + if (IS_ERR(op->op_sg)) { + ret = PTR_ERR(op->op_sg); goto out_pages; + } if (op->op_notify || op->op_recverr) { /* We allocate an uninitialized notifier here, because @@ -906,9 +908,11 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); rm->atomic.op_active = 1; rm->atomic.op_recverr = rs->rs_recverr; - rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1, &ret); - if (!rm->atomic.op_sg) + rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); + if (IS_ERR(rm->atomic.op_sg)) { + ret = PTR_ERR(rm->atomic.op_sg); goto err; + } /* verify 8 byte-aligned */ if (args->local_addr & 0x7) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 8e18cd2aec51..6019b0c004a9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -844,8 +844,7 @@ rds_conn_connecting(struct rds_connection *conn) /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); -struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents, - int *ret); +struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); diff --git a/net/rds/send.c b/net/rds/send.c index 82dcd8b84fe7..68e2bdb08fd0 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1274,9 +1274,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs, &ret); - if (!rm->data.op_sg) + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + if (IS_ERR(rm->data.op_sg)) { + ret = PTR_ERR(rm->data.op_sg); goto out; + } ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy); if (ret) goto out; -- cgit From 672e24772aeb45293c86f6176520d98b19cd48a1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 16 Apr 2020 00:16:30 +0100 Subject: ipv6: remove redundant assignment to variable err The variable err is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/ipv6/seg6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 75421a472d25..4c7e0a27fa9c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -434,7 +434,7 @@ static struct genl_family seg6_genl_family __ro_after_init = { int __init seg6_init(void) { - int err = -ENOMEM; + int err; err = genl_register_family(&seg6_genl_family); if (err) -- cgit From c8322754642052b3580db8bc3c33fd671a41cdd6 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 15 Apr 2020 22:01:49 +0200 Subject: dt-bindings: net: ethernet-phy: add desciption for ethernet-phy-id1234.d400 The description below is already in use in 'rk3228-evb.dts', 'rk3229-xms6.dts' and 'rk3328.dtsi' but somehow never added to a document, so add "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22" for ethernet-phy nodes on Rockchip platforms to 'ethernet-phy.yaml'. Signed-off-by: Johan Jonker Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet-phy.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 8927941c74bb..5aa141ccc113 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -43,6 +43,9 @@ properties: second group of digits is the Phy Identifier 2 register, this is the chip vendor OUI bits 19:24, followed by 10 bits of a vendor specific ID. + - items: + - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$" + - const: ethernet-phy-ieee802.3-c22 - items: - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$" - const: ethernet-phy-ieee802.3-c45 -- cgit From ae5a44bb970ad8d0f7382cf3fc9738787e3cf19f Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 16:42:48 +0800 Subject: net: tulip: make early_486_chipsets static Fix the following sparse warning: drivers/net/ethernet/dec/tulip/tulip_core.c:1280:28: warning: symbol 'early_486_chipsets' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 48ea658aa1a6..15efc294f513 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1277,7 +1277,7 @@ static const struct net_device_ops tulip_netdev_ops = { #endif }; -const struct pci_device_id early_486_chipsets[] = { +static const struct pci_device_id early_486_chipsets[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424) }, { PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496) }, { }, -- cgit From 5309960e49f5e2363d2814488878a29e944e1be9 Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Wed, 15 Apr 2020 17:54:04 +0800 Subject: Documentation: Fix tcp_challenge_ack_limit default value The default value of tcp_challenge_ack_limit has been changed from 100 to 1000 and this patch fixes its documentation. Signed-off-by: Cambda Zhu Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ee961d322d93..6fcfd313dbe4 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -812,7 +812,7 @@ tcp_limit_output_bytes - INTEGER tcp_challenge_ack_limit - INTEGER Limits number of Challenge ACK sent per second, as recommended in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) - Default: 100 + Default: 1000 tcp_rx_skb_cache - BOOLEAN Controls a per TCP socket cache of one skb, that might help -- cgit From edadedf1c5b4e4404192a0a4c3c0c05e3b7672ab Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Wed, 15 Apr 2020 18:34:49 +0700 Subject: tipc: fix incorrect increasing of link window In commit 16ad3f4022bb ("tipc: introduce variable window congestion control"), we allow link window to change with the congestion avoidance algorithm. However, there is a bug that during the slow-start if packet retransmission occurs, the link will enter the fast-recovery phase, set its window to the 'ssthresh' which is never less than 300, so the link window suddenly increases to that limit instead of decreasing. Consequently, two issues have been observed: - For broadcast-link: it can leave a gap between the link queues that a new packet will be inserted and sent before the previous ones, i.e. not in-order. - For unicast: the algorithm does not work as expected, the link window jumps to the slow-start threshold whereas packet retransmission occurs. This commit fixes the issues by avoiding such the link window increase, but still decreasing if the 'ssthresh' is lowered. Fixes: 16ad3f4022bb ("tipc: introduce variable window congestion control") Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 467c53a1fb5c..d4675e922a8f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1065,7 +1065,7 @@ static void tipc_link_update_cwin(struct tipc_link *l, int released, /* Enter fast recovery */ if (unlikely(retransmitted)) { l->ssthresh = max_t(u16, l->window / 2, 300); - l->window = l->ssthresh; + l->window = min_t(u16, l->ssthresh, l->window); return; } /* Enter slow start */ -- cgit From e045124e93995fe01e42ed530003ddba5d55db4f Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Tue, 14 Apr 2020 14:34:08 +0800 Subject: net: dsa: mt7530: fix tagged frames pass-through in VLAN-unaware mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In VLAN-unaware mode, the Egress Tag (EG_TAG) field in Port VLAN Control register must be set to Consistent to let tagged frames pass through as is, otherwise their tags will be stripped. Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530") Signed-off-by: DENG Qingfang Reviewed-by: Florian Fainelli Tested-by: René van Dorst Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 18 ++++++++++++------ drivers/net/dsa/mt7530.h | 7 +++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 84391c8a0e16..5c444cd722bd 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -773,8 +773,9 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) */ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_MATRIX_MODE); - mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK, - VLAN_ATTR(MT7530_VLAN_TRANSPARENT)); + mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, + VLAN_ATTR(MT7530_VLAN_TRANSPARENT) | + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); for (i = 0; i < MT7530_NUM_PORTS; i++) { if (dsa_is_user_port(ds, i) && @@ -790,8 +791,8 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) if (all_user_ports_removed) { mt7530_write(priv, MT7530_PCR_P(MT7530_CPU_PORT), PCR_MATRIX(dsa_user_ports(priv->ds))); - mt7530_write(priv, MT7530_PVC_P(MT7530_CPU_PORT), - PORT_SPEC_TAG); + mt7530_write(priv, MT7530_PVC_P(MT7530_CPU_PORT), PORT_SPEC_TAG + | PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } } @@ -817,8 +818,9 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) /* Set the port as a user port which is to be able to recognize VID * from incoming packets before fetching entry within the VLAN table. */ - mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK, - VLAN_ATTR(MT7530_VLAN_USER)); + mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, + VLAN_ATTR(MT7530_VLAN_USER) | + PVC_EG_TAG(MT7530_VLAN_EG_DISABLED)); } static void @@ -1303,6 +1305,10 @@ mt7530_setup(struct dsa_switch *ds) mt7530_cpu_port_enable(priv, i); else mt7530_port_disable(ds, i); + + /* Enable consistent egress tag */ + mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } /* Setup port 5 */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 4aef6024441b..979bb6374678 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -172,9 +172,16 @@ enum mt7530_port_mode { /* Register for port vlan control */ #define MT7530_PVC_P(x) (0x2010 + ((x) * 0x100)) #define PORT_SPEC_TAG BIT(5) +#define PVC_EG_TAG(x) (((x) & 0x7) << 8) +#define PVC_EG_TAG_MASK PVC_EG_TAG(7) #define VLAN_ATTR(x) (((x) & 0x3) << 6) #define VLAN_ATTR_MASK VLAN_ATTR(3) +enum mt7530_vlan_port_eg_tag { + MT7530_VLAN_EG_DISABLED = 0, + MT7530_VLAN_EG_CONSISTENT = 1, +}; + enum mt7530_vlan_port_attr { MT7530_VLAN_USER = 0, MT7530_VLAN_TRANSPARENT = 3, -- cgit From 806fd188ce2a4f8b587e83e73c478e6484fbfa55 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 14 Apr 2020 15:39:52 -0700 Subject: net: stmmac: dwmac-sunxi: Provide TX and RX fifo sizes After commit bfcb813203e619a8960a819bf533ad2a108d8105 ("net: dsa: configure the MTU for switch ports") my Lamobo R1 platform which uses an allwinner,sun7i-a20-gmac compatible Ethernet MAC started to fail by rejecting a MTU of 1536. The reason for that is that the DMA capabilities are not readable on this version of the IP, and there is also no 'tx-fifo-depth' property being provided in Device Tree. The property is documented as optional, and is not provided. Chen-Yu indicated that the FIFO sizes are 4KB for TX and 16KB for RX, so provide these values through platform data as an immediate fix until various Device Tree sources get updated accordingly. Fixes: eaf4fac47807 ("net: stmmac: Do not accept invalid MTU values") Suggested-by: Chen-Yu Tsai Signed-off-by: Florian Fainelli Acked-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 7d40760e9ba8..0e1ca2cba3c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -150,6 +150,8 @@ static int sun7i_gmac_probe(struct platform_device *pdev) plat_dat->init = sun7i_gmac_init; plat_dat->exit = sun7i_gmac_exit; plat_dat->fix_mac_speed = sun7i_fix_speed; + plat_dat->tx_fifo_size = 4096; + plat_dat->rx_fifo_size = 16384; ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv); if (ret) -- cgit From 05eab4f328bb127de37c1d619013c340cc5aaf39 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 15 Apr 2020 16:42:26 +0800 Subject: mISDN: make dmril and dmrim static Fix the following sparse warning: drivers/isdn/hardware/mISDN/mISDNisar.c:746:12: warning: symbol 'dmril' was not declared. Should it be static? drivers/isdn/hardware/mISDN/mISDNisar.c:749:12: warning: symbol 'dmrim' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/mISDNisar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c index e325e87c0593..11e8c7d8b6e8 100644 --- a/drivers/isdn/hardware/mISDN/mISDNisar.c +++ b/drivers/isdn/hardware/mISDN/mISDNisar.c @@ -743,10 +743,10 @@ check_send(struct isar_hw *isar, u8 rdm) } } -const char *dmril[] = {"NO SPEED", "1200/75", "NODEF2", "75/1200", "NODEF4", +static const char *dmril[] = {"NO SPEED", "1200/75", "NODEF2", "75/1200", "NODEF4", "300", "600", "1200", "2400", "4800", "7200", "9600nt", "9600t", "12000", "14400", "WRONG"}; -const char *dmrim[] = {"NO MOD", "NO DEF", "V32/V32b", "V22", "V21", +static const char *dmrim[] = {"NO MOD", "NO DEF", "V32/V32b", "V22", "V21", "Bell103", "V23", "Bell202", "V17", "V29", "V27ter"}; static void -- cgit From d518691cbd3be3dae218e05cca3f3fc9b2f1aa77 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 16 Apr 2020 17:57:40 +0200 Subject: amd-xgbe: Use __napi_schedule() in BH context The driver uses __napi_schedule_irqoff() which is fine as long as it is invoked with disabled interrupts by everybody. Since the commit mentioned below the driver may invoke xgbe_isr_task() in tasklet/softirq context. This may lead to list corruption if another driver uses __napi_schedule_irqoff() in IRQ context. Use __napi_schedule() which safe to use from IRQ and softirq context. Fixes: 85b85c853401d ("amd-xgbe: Re-issue interrupt if interrupt status not cleared") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tom Lendacky Cc: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index b71f9b04a51e..a87264f95f1a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -514,7 +514,7 @@ static void xgbe_isr_task(unsigned long data) xgbe_disable_rx_tx_ints(pdata); /* Turn on polling */ - __napi_schedule_irqoff(&pdata->napi); + __napi_schedule(&pdata->napi); } } else { /* Don't clear Rx/Tx status if doing per channel DMA -- cgit