diff options
741 files changed, 18586 insertions, 11099 deletions
@@ -191,6 +191,10 @@ Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com> Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com> Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com> Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com> +Geliang Tang <geliang.tang@linux.dev> <geliang.tang@suse.com> +Geliang Tang <geliang.tang@linux.dev> <geliangtang@xiaomi.com> +Geliang Tang <geliang.tang@linux.dev> <geliangtang@gmail.com> +Geliang Tang <geliang.tang@linux.dev> <geliangtang@163.com> Georgi Djakov <djakov@kernel.org> <georgi.djakov@linaro.org> Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com> Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com> diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml index 73674baea75d..f9160d7bac3c 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml @@ -42,6 +42,8 @@ properties: - lg,acx467akm-7 # LG Corporation 7" WXGA TFT LCD panel - lg,ld070wx3-sl01 + # LG Corporation 5" HD TFT LCD panel + - lg,lh500wx1-sd03 # One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel - osddisplays,osd101t2587-53ts # Panasonic 10" WUXGA TFT LCD panel diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index 3ec9ee95045f..11422af3477e 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -208,8 +208,6 @@ properties: - lemaker,bl035-rgb-002 # LG 7" (800x480 pixels) TFT LCD panel - lg,lb070wv8 - # LG Corporation 5" HD TFT LCD panel - - lg,lh500wx1-sd03 # LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel - lg,lp079qx1-sp0v # LG 9.7" (2048x1536 pixels) TFT LCD panel diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index d3306b186000..890f7858d0dc 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -58,6 +58,7 @@ properties: - renesas,r9a07g043-gbeth # RZ/G2UL and RZ/Five - renesas,r9a07g044-gbeth # RZ/G2{L,LC} - renesas,r9a07g054-gbeth # RZ/V2L + - renesas,r9a08g045-gbeth # RZ/G3S - const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family reg: true diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index c3a438197964..cf6eaa0da821 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -2254,3 +2254,14 @@ operations: - bus-name - dev-name - selftests + + - + name: notify-filter-set + doc: Set notification messages socket filter. + attribute-set: devlink + do: + request: + attributes: + - bus-name + - dev-name + - port-index diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst index 5eaa3ab6c73e..b842bcb14255 100644 --- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst +++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -54,6 +54,7 @@ ena_common_defs.h Common definitions for ena_com layer. ena_regs_defs.h Definition of ENA PCI memory-mapped (MMIO) registers. ena_netdev.[ch] Main Linux kernel driver. ena_ethtool.c ethtool callbacks. +ena_xdp.[ch] XDP files ena_pci_id_tbl.h Supported device IDs. ================= ====================================================== diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 6cab1b797739..e75a53593bb9 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -38,7 +38,7 @@ netdev_features_t wanted_features netdev_features_t vlan_features netdev_features_t hw_enc_features - - netif_skb_features netdev_features_t mpls_features -netdev_features_t gso_partial_features +netdev_features_t gso_partial_features read_mostly gso_features_check unsigned_int min_mtu unsigned_int max_mtu unsigned_short type @@ -96,7 +96,7 @@ unsigned_char* dev_addr struct_netdev_queue* _rx read_mostly - netdev_get_rx_queue(rx) unsigned_int num_rx_queues unsigned_int real_num_rx_queues - read_mostly get_rps_cpu -struct_bpf_prog* xdp_prog +struct_bpf_prog* xdp_prog - read_mostly netif_elide_gro() unsigned_long gro_flush_timeout - read_mostly napi_complete_done int napi_defer_hard_irqs - read_mostly napi_complete_done unsigned_int gro_max_size - read_mostly skb_gro_receive diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 335b53df35e2..a8a1aff6445e 100644 --- a/Documentation/sphinx/requirements.txt +++ b/Documentation/sphinx/requirements.txt @@ -1,3 +1,4 @@ # jinja2>=3.1 is not compatible with Sphinx<4.0 jinja2<3.1 Sphinx==2.4.4 +pyyaml diff --git a/MAINTAINERS b/MAINTAINERS index dda78b4ce707..2b916990d7f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6050,10 +6050,8 @@ M: Mikulas Patocka <mpatocka@redhat.com> M: dm-devel@lists.linux.dev L: dm-devel@lists.linux.dev S: Maintained -W: http://sources.redhat.com/dm Q: http://patchwork.kernel.org/project/dm-devel/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git -T: quilt http://people.redhat.com/agk/patches/linux/editing/ F: Documentation/admin-guide/device-mapper/ F: drivers/md/Kconfig F: drivers/md/Makefile @@ -9534,6 +9532,7 @@ F: drivers/bus/hisi_lpc.c HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3) M: Yisen Zhuang <yisen.zhuang@huawei.com> M: Salil Mehta <salil.mehta@huawei.com> +M: Jijie Shao <shaojijie@huawei.com> L: netdev@vger.kernel.org S: Maintained W: http://www.hisilicon.com @@ -12201,6 +12200,8 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Michael Ellerman <mpe@ellerman.id.au> R: Nicholas Piggin <npiggin@gmail.com> R: Christophe Leroy <christophe.leroy@csgroup.eu> +R: Aneesh Kumar K.V <aneesh.kumar@kernel.org> +R: Naveen N. Rao <naveen.n.rao@linux.ibm.com> L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki @@ -15443,7 +15444,7 @@ NXP C45 TJA11XX PHY DRIVER M: Radu Pirea <radu-nicolae.pirea@oss.nxp.com> L: netdev@vger.kernel.org S: Maintained -F: drivers/net/phy/nxp-c45-tja11xx.c +F: drivers/net/phy/nxp-c45-tja11xx* NXP FSPI DRIVER M: Han Xu <han.xu@nxp.com> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 3162db540ee9..1b0483c51cc1 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -49,7 +49,6 @@ config ARC select OF select OF_EARLY_FLATTREE select PCI_SYSCALL if PCI - select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select TRACE_IRQFLAGS_SUPPORT @@ -232,10 +231,6 @@ config ARC_CACHE_PAGES Note that Global I/D ENABLE + Per Page DISABLE works but corollary Global DISABLE + Per Page ENABLE won't work -config ARC_CACHE_VIPT_ALIASING - bool "Support VIPT Aliasing D$" - depends on ARC_HAS_DCACHE && ISA_ARCOMPACT - endif #ARC_CACHE config ARC_HAS_ICCM diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index bd5b1a9a0544..563af3e75f01 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -44,31 +44,10 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz); #define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */ -#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING - #define flush_cache_mm(mm) /* called on munmap/exit */ #define flush_cache_range(mm, u_vstart, u_vend) #define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */ -#else /* VIPT aliasing dcache */ - -/* To clear out stale userspace mappings */ -void flush_cache_mm(struct mm_struct *mm); -void flush_cache_range(struct vm_area_struct *vma, - unsigned long start,unsigned long end); -void flush_cache_page(struct vm_area_struct *vma, - unsigned long user_addr, unsigned long page); - -/* - * To make sure that userspace mapping is flushed to memory before - * get_user_pages() uses a kernel mapping to access the page - */ -#define ARCH_HAS_FLUSH_ANON_PAGE -void flush_anon_page(struct vm_area_struct *vma, - struct page *page, unsigned long u_vaddr); - -#endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */ - /* * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default * This works around some PIO based drivers which don't call flush_dcache_page @@ -76,28 +55,6 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 -#define CACHE_COLORS_NUM 4 -#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) - -/* - * Simple wrapper over config option - * Bootup code ensures that hardware matches kernel configuration - */ -static inline int cache_is_vipt_aliasing(void) -{ - return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); -} - -/* - * checks if two addresses (after page aligning) index into same cache set - */ -#define addr_not_cache_congruent(addr1, addr2) \ -({ \ - cache_is_vipt_aliasing() ? \ - (CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0; \ -}) - #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 4d13320e0c1b..3802a2daaf86 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -291,4 +291,36 @@ /* M = 8-1 N = 8 */ .endm +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + #endif diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index a0e760eb35a8..92c3e9f13252 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -33,6 +33,91 @@ #include <asm/irqflags-compact.h> #include <asm/thread_info.h> /* For THREAD_SIZE */ +/* Note on the LD/ST addr modes with addr reg wback + * + * LD.a same as LD.aw + * + * LD.a reg1, [reg2, x] => Pre Incr + * Eff Addr for load = [reg2 + x] + * + * LD.ab reg1, [reg2, x] => Post Incr + * Eff Addr for load = [reg2] + */ + +.macro PUSHAX aux + lr r9, [\aux] + push r9 +.endm + +.macro POPAX aux + pop r9 + sr r9, [\aux] +.endm + +.macro SAVE_R0_TO_R12 + push r0 + push r1 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 +.endm + +.macro RESTORE_R12_TO_R0 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r1 + pop r0 +.endm + +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + /*-------------------------------------------------------------- * Switch to Kernel Mode stack if SP points to User Mode stack * @@ -235,7 +320,7 @@ SWITCH_TO_KERNEL_STK - PUSH 0x003\LVL\()abcd /* Dummy ECR */ + st.a 0x003\LVL\()abcd, [sp, -4] /* Dummy ECR */ sub sp, sp, 8 /* skip orig_r0 (not needed) skip pt_regs->sp, already saved above */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 49c2e090cb5c..cf1ba376e992 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -21,114 +21,12 @@ #include <asm/entry-arcv2.h> #endif -/* Note on the LD/ST addr modes with addr reg wback - * - * LD.a same as LD.aw - * - * LD.a reg1, [reg2, x] => Pre Incr - * Eff Addr for load = [reg2 + x] - * - * LD.ab reg1, [reg2, x] => Post Incr - * Eff Addr for load = [reg2] - */ - -.macro PUSH reg - st.a \reg, [sp, -4] -.endm - -.macro PUSHAX aux - lr r9, [\aux] - PUSH r9 -.endm - -.macro POP reg - ld.ab \reg, [sp, 4] -.endm - -.macro POPAX aux - POP r9 - sr r9, [\aux] -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore Scratch Regs: - * used by Interrupt/Exception Prologue/Epilogue - *-------------------------------------------------------------*/ -.macro SAVE_R0_TO_R12 - PUSH r0 - PUSH r1 - PUSH r2 - PUSH r3 - PUSH r4 - PUSH r5 - PUSH r6 - PUSH r7 - PUSH r8 - PUSH r9 - PUSH r10 - PUSH r11 - PUSH r12 -.endm - -.macro RESTORE_R12_TO_R0 - POP r12 - POP r11 - POP r10 - POP r9 - POP r8 - POP r7 - POP r6 - POP r5 - POP r4 - POP r3 - POP r2 - POP r1 - POP r0 - -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore callee-saved regs: - * used by several macros below - *-------------------------------------------------------------*/ -.macro SAVE_R13_TO_R25 - PUSH r13 - PUSH r14 - PUSH r15 - PUSH r16 - PUSH r17 - PUSH r18 - PUSH r19 - PUSH r20 - PUSH r21 - PUSH r22 - PUSH r23 - PUSH r24 - PUSH r25 -.endm - -.macro RESTORE_R25_TO_R13 - POP r25 - POP r24 - POP r23 - POP r22 - POP r21 - POP r20 - POP r19 - POP r18 - POP r17 - POP r16 - POP r15 - POP r14 - POP r13 -.endm - /* * save user mode callee regs as struct callee_regs * - needed by fork/do_signal/unaligned-access-emulation. */ .macro SAVE_CALLEE_SAVED_USER - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm /* @@ -136,18 +34,18 @@ * - could have been changed by ptrace tracer or unaligned-access fixup */ .macro RESTORE_CALLEE_SAVED_USER - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /* * save/restore kernel mode callee regs at the time of context switch */ .macro SAVE_CALLEE_SAVED_KERNEL - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm .macro RESTORE_CALLEE_SAVED_KERNEL - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /*-------------------------------------------------------------- diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index ef8d4166370c..8a2441670a8f 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -10,6 +10,13 @@ #include <linux/types.h> #include <asm-generic/pgtable-nopmd.h> +/* + * Hugetlb definitions. + */ +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 4a2b30fb5a98..00b9318e551e 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -54,6 +54,10 @@ struct pt_regs { ecr_reg ecr; }; +struct callee_regs { + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; +}; + #define MAX_REG_OFFSET offsetof(struct pt_regs, ecr) #else @@ -92,16 +96,14 @@ struct pt_regs { unsigned long status32; }; -#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) - -#endif - -/* Callee saved registers - need to be saved only when you are scheduled out */ - struct callee_regs { unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) + +#endif + #define instruction_pointer(regs) ((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 4dcf8589b708..d08a5092c2b4 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -153,7 +153,7 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) { int n = 0; #ifdef CONFIG_ISA_ARCV2 - const char *release, *cpu_nm, *isa_nm = "ARCv2"; + const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2"; int dual_issue = 0, dual_enb = 0, mpy_opt, present; int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk; char mpy_nm[16], lpb_nm[32]; @@ -172,8 +172,6 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) * releases only update it. */ - cpu_nm = "HS38"; - if (info->arcver > 0x50 && info->arcver <= 0x53) { release = arc_hs_rel[info->arcver - 0x51].str; } else { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 0b3bb529d246..8f6f4a542964 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -62,7 +62,7 @@ struct rt_sigframe { unsigned int sigret_magic; }; -static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT @@ -75,12 +75,12 @@ static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) #else v2abi.r58 = v2abi.r59 = 0; #endif - err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); + err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi)); #endif return err; } -static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f7e05c146637..9106ceac323c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -145,10 +145,9 @@ dc_chk: p_dc->sz_k = 1 << (dbcr.sz - 1); n += scnprintf(buf + n, len - n, - "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", + "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n", p_dc->sz_k, assoc, p_dc->line_len, vipt ? "VIPT" : "PIPT", - p_dc->colors > 1 ? " aliasing" : "", IS_USED_CFG(CONFIG_ARC_HAS_DCACHE)); slc_chk: @@ -703,51 +702,10 @@ static inline void arc_slc_enable(void) * Exported APIs */ -/* - * Handle cache congruency of kernel and userspace mappings of page when kernel - * writes-to/reads-from - * - * The idea is to defer flushing of kernel mapping after a WRITE, possible if: - * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent - * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) - * -In SMP, if hardware caches are coherent - * - * There's a corollary case, where kernel READs from a userspace mapped page. - * If the U-mapping is not congruent to K-mapping, former needs flushing. - */ void flush_dcache_folio(struct folio *folio) { - struct address_space *mapping; - - if (!cache_is_vipt_aliasing()) { - clear_bit(PG_dc_clean, &folio->flags); - return; - } - - /* don't handle anon pages here */ - mapping = folio_flush_mapping(folio); - if (!mapping) - return; - - /* - * pagecache page, file not yet mapped to userspace - * Make a note that K-mapping is dirty - */ - if (!mapping_mapped(mapping)) { - clear_bit(PG_dc_clean, &folio->flags); - } else if (folio_mapped(folio)) { - /* kernel reading from page with U-mapping */ - phys_addr_t paddr = (unsigned long)folio_address(folio); - unsigned long vaddr = folio_pos(folio); - - /* - * vaddr is not actually the virtual address, but is - * congruent to every user mapping. - */ - if (addr_not_cache_congruent(paddr, vaddr)) - __flush_dcache_pages(paddr, vaddr, - folio_nr_pages(folio)); - } + clear_bit(PG_dc_clean, &folio->flags); + return; } EXPORT_SYMBOL(flush_dcache_folio); @@ -921,44 +879,6 @@ noinline void flush_cache_all(void) } -#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING - -void flush_cache_mm(struct mm_struct *mm) -{ - flush_cache_all(); -} - -void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, - unsigned long pfn) -{ - phys_addr_t paddr = pfn << PAGE_SHIFT; - - u_vaddr &= PAGE_MASK; - - __flush_dcache_pages(paddr, u_vaddr, 1); - - if (vma->vm_flags & VM_EXEC) - __inv_icache_pages(paddr, u_vaddr, 1); -} - -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_cache_all(); -} - -void flush_anon_page(struct vm_area_struct *vma, struct page *page, - unsigned long u_vaddr) -{ - /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1); - __flush_dcache_pages((phys_addr_t)page_address(page), - (phys_addr_t)page_address(page), 1); - -} - -#endif - void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { @@ -966,46 +886,11 @@ void copy_user_highpage(struct page *to, struct page *from, struct folio *dst = page_folio(to); void *kfrom = kmap_atomic(from); void *kto = kmap_atomic(to); - int clean_src_k_mappings = 0; - - /* - * If SRC page was already mapped in userspace AND it's U-mapping is - * not congruent with K-mapping, sync former to physical page so that - * K-mapping in memcpy below, sees the right data - * - * Note that while @u_vaddr refers to DST page's userspace vaddr, it is - * equally valid for SRC page as well - * - * For !VIPT cache, all of this gets compiled out as - * addr_not_cache_congruent() is 0 - */ - if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1); - clean_src_k_mappings = 1; - } copy_page(kto, kfrom); - /* - * Mark DST page K-mapping as dirty for a later finalization by - * update_mmu_cache(). Although the finalization could have been done - * here as well (given that both vaddr/paddr are available). - * But update_mmu_cache() already has code to do that for other - * non copied user pages (e.g. read faults which wire in pagecache page - * directly). - */ clear_bit(PG_dc_clean, &dst->flags); - - /* - * if SRC was already usermapped and non-congruent to kernel mapping - * sync the kernel mapping back to physical page - */ - if (clean_src_k_mappings) { - __flush_dcache_pages((unsigned long)kfrom, - (unsigned long)kfrom, 1); - } else { - clear_bit(PG_dc_clean, &src->flags); - } + clear_bit(PG_dc_clean, &src->flags); kunmap_atomic(kto); kunmap_atomic(kfrom); @@ -1140,17 +1025,8 @@ static noinline void __init arc_cache_init_master(void) dc->line_len, L1_CACHE_BYTES); /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ - if (is_isa_arcompact()) { - int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - - if (dc->colors > 1) { - if (!handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - if (CACHE_COLORS_NUM != dc->colors) - panic("CACHE_COLORS_NUM not optimized for config\n"); - } else if (handled && dc->colors == 1) { - panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - } + if (is_isa_arcompact() && dc->colors > 1) { + panic("Aliasing VIPT cache not supported\n"); } } diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index fce5fa2b4f52..3c1c7ae73292 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -14,10 +14,6 @@ #include <asm/cacheflush.h> -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ - (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) - /* * Ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. @@ -31,21 +27,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - int do_align = 0; - int aliasing = cache_is_vipt_aliasing(); struct vm_unmapped_area_info info; /* - * We only need to do colour alignment if D cache aliases. - */ - if (aliasing) - do_align = filp || (flags & MAP_SHARED); - - /* * We enforce the MAP_FIXED case. */ if (flags & MAP_FIXED) { - if (aliasing && flags & MAP_SHARED && + if (flags & MAP_SHARED && (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; @@ -55,10 +43,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return -ENOMEM; if (addr) { - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && @@ -70,7 +55,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index e536b2dcd4b0..ad702b49aeb3 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -478,21 +478,15 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, create_tlb(vma, vaddr, ptep); - if (page == ZERO_PAGE(0)) { + if (page == ZERO_PAGE(0)) return; - } /* - * Exec page : Independent of aliasing/page-color considerations, - * since icache doesn't snoop dcache on ARC, any dirty - * K-mapping of a code page needs to be wback+inv so that - * icache fetch by userspace sees code correctly. - * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it - * so userspace sees the right data. - * (Avoids the flush for Non-exec + congruent mapping case) + * For executable pages, since icache doesn't snoop dcache, any + * dirty K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. */ - if ((vma->vm_flags & VM_EXEC) || - addr_not_cache_congruent(paddr, vaddr)) { + if (vma->vm_flags & VM_EXEC) { struct folio *folio = page_folio(page); int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags); if (dirty) { diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi index 1a2cd5baf402..5b9e01a8aa5d 100644 --- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi +++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi @@ -359,6 +359,7 @@ <SYSC_IDLE_NO>, <SYSC_IDLE_SMART>, <SYSC_IDLE_SMART_WKUP>; + ti,sysc-delay-us = <2>; clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi index 3f3e52e3b375..6509c742fb58 100644 --- a/arch/arm/boot/dts/ti/omap/dra7.dtsi +++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi @@ -147,7 +147,7 @@ l3-noc@44000000 { compatible = "ti,dra7-l3-noc"; - reg = <0x44000000 0x1000>, + reg = <0x44000000 0x1000000>, <0x45000000 0x1000>; interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 98999aa8cc0c..7f387706368a 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -793,11 +793,16 @@ void __init omap_soc_device_init(void) soc_dev_attr->machine = soc_name; soc_dev_attr->family = omap_get_family(); + if (!soc_dev_attr->family) { + kfree(soc_dev_attr); + return; + } soc_dev_attr->revision = soc_rev; soc_dev_attr->custom_attr_group = omap_soc_groups[0]; soc_dev = soc_device_register(soc_dev_attr); if (IS_ERR(soc_dev)) { + kfree(soc_dev_attr->family); kfree(soc_dev_attr); return; } diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi index 15290e6892fc..fc7315b94406 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi @@ -68,10 +68,7 @@ &emac0 { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; - allwinner,rx-delay-ps = <3100>; - allwinner,tx-delay-ps = <700>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts index d83852e72f06..b5d713926a34 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts @@ -13,6 +13,9 @@ }; &emac0 { + allwinner,rx-delay-ps = <3100>; + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii"; phy-supply = <®_dcdce>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts index 00fe28caac93..b3b1b8692125 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts @@ -13,6 +13,8 @@ }; &emac0 { + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii-rxid"; phy-supply = <®_dldo1>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts index 70b465f7c6a7..00ac59a873e8 100644 --- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts +++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts @@ -238,6 +238,7 @@ mt6360: pmic@34 { compatible = "mediatek,mt6360"; reg = <0x34>; + interrupt-parent = <&pio>; interrupts = <128 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; interrupt-controller; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b19a8aee684c..79ce70fbb751 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -834,6 +834,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d977713ec0ba..abb57bc54305 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -44,9 +44,6 @@ return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -81,6 +78,5 @@ } asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); -#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 33795e4a5bd6..9c333d133c30 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -304,7 +304,6 @@ CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_BPF=m CONFIG_OPENVSWITCH=m diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4fcc168f0732..3c808c680370 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h index f5a8b2defa4b..3b0b64f0a353 100644 --- a/arch/m68k/include/asm/kexec.h +++ b/arch/m68k/include/asm/kexec.h @@ -2,7 +2,7 @@ #ifndef _ASM_M68K_KEXEC_H #define _ASM_M68K_KEXEC_H -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) @@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* _ASM_M68K_KEXEC_H */ diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 01fb69a5095f..f335bf3268a1 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o obj-$(CONFIG_UBOOT) += uboot.o diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 33c09688210f..08ea2cde1eb5 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index dc49b09d492b..e22e8b825903 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -173,7 +173,6 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 6f8046024557..4390d30206d9 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -202,7 +202,6 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 16a91eeff67f..d63d8be8cb50 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -206,7 +206,6 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 264aba29ea4f..338bb6544a93 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -203,7 +203,6 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 02ec6c1a5116..517f1b060bf4 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -96,7 +96,6 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_PEDIT=m CONFIG_HAMRADIO=y CONFIG_MTD=y diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index d6d5fa5cc31d..69e579e41e66 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, prepare_frametrace(newregs); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct kimage; extern unsigned long kexec_args[4]; extern int (*_machine_kexec_prepare)(struct kimage *); diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 5719ff49eff1..0c59e168f800 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -35,7 +35,7 @@ struct plat_smp_ops { void (*cpu_die)(unsigned int cpu); void (*cleanup_dead_cpu)(unsigned cpu); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_nonboot_cpu)(void); #endif }; diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index a40d8c0e4b87..901bc61fa7ae 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu) extern void __noreturn play_dead(void); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static inline void kexec_nonboot_cpu(void) { extern const struct plat_smp_ops *mp_ops; /* private */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 853a43ee4b44..ecf3278a32f7 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index c074ecce3fbf..b3dbf9ecb0d6 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index dd55d59b88db..f6c37d407f36 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -392,7 +392,7 @@ static void cps_smp_finish(void) local_irq_enable(); } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) enum cpu_death { CPU_DEATH_HALT, @@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void cps_kexec_nonboot_cpu(void) { @@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void) cps_shutdown_this_cpu(CPU_DEATH_POWER); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */ #ifdef CONFIG_HOTPLUG_CPU @@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = { .cpu_die = cps_cpu_die, .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, #endif }; diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index e420800043b0..e01c8d4a805a 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -53,7 +53,7 @@ static void loongson_halt(void) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* 0X80000000~0X80200000 is safe */ #define MAX_ARGS 64 @@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void) _machine_halt = loongson_halt; pm_power_off = loongson_poweroff; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); if (WARN_ON(!kexec_argv)) return -ENOMEM; diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e015a26a40f7..498bdc1bb0ed 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = { .cpu_disable = loongson3_cpu_disable, .cpu_die = loongson3_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index f279703425d4..66c7b28d7450 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -274,7 +274,6 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b1f25bac280b..71d52a670d95 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - if (migration_in_progress) + if (migration_in_progress) { rc = -EBUSY; - else + } else { rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + mutex_unlock(&vas_pseries_mutex); if (rc) goto out; @@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, goto out_free; txwin->win_type = cop_feat_caps->win_type; - mutex_lock(&vas_pseries_mutex); + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * * Possible to lose the acquired credit with DLPAR core * removal after the window is opened. So if there are any * closed windows (means with lost credits), do not give new @@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * after the existing windows are reopened when credits are * available. */ - if (!caps->nr_close_wins) { + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { list_add(&txwin->win_list, &caps->list); caps->nr_open_windows++; + caps->nr_open_wins_progress--; mutex_unlock(&vas_pseries_mutex); vas_user_win_add_mm_context(&txwin->vas_win.task_ref); return &txwin->vas_win; @@ -433,6 +448,12 @@ out_free: */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); out: atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); @@ -937,14 +958,14 @@ int vas_migration_handler(int action) struct vas_caps *vcaps; int i, rc = 0; + pr_info("VAS migration event %d\n", action); + /* * NX-GZIP is not enabled. Nothing to do for migration. */ if (!copypaste_feat) return rc; - mutex_lock(&vas_pseries_mutex); - if (action == VAS_SUSPEND) migration_in_progress = true; else @@ -990,12 +1011,27 @@ int vas_migration_handler(int action) switch (action) { case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); break; case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); atomic_set(&caps->nr_total_credits, new_nr_creds); rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); break; default: /* should not happen */ @@ -1011,8 +1047,9 @@ int vas_migration_handler(int action) goto out; } + pr_info("VAS migration event (%d) successful\n", action); + out: - mutex_unlock(&vas_pseries_mutex); return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 7115043ec488..45567cd13178 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -91,6 +91,8 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ int nr_open_windows; /* Number of successful open windows */ u8 feat; /* Feature type */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..24c1799e2ec4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. config ARCH_SUPPORTS_KEXEC - def_bool MMU + def_bool y config ARCH_SELECTS_KEXEC def_bool y @@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC select HOTPLUG_CPU if SMP config ARCH_SUPPORTS_KEXEC_FILE - def_bool 64BIT && MMU + def_bool 64BIT config ARCH_SELECTS_KEXEC_FILE def_bool y diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 294044429e8e..ab00235b018f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 1d7942c8a6cb..eeec04b7dae6 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -46,9 +46,6 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -82,6 +79,4 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers); - #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 55f1d7856b54..8706736fd4e2 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -5,18 +5,20 @@ void arch_crash_save_vmcoreinfo(void) { - VMCOREINFO_NUMBER(VA_BITS); VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); #ifdef CONFIG_64BIT vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); #endif +#endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..85490d9373fc 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,8 +44,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -76,7 +75,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -93,6 +91,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLUB_STATS=y # CONFIG_COMPAT_BRK is not set @@ -374,7 +373,6 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -619,6 +617,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=y +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -691,7 +692,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y @@ -834,7 +834,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 1b8150e50f6a..fb690fbbf54b 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -42,8 +42,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -71,7 +70,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -88,6 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y @@ -364,7 +363,6 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -605,6 +603,9 @@ CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -677,7 +678,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index b831083b4edd..47028450eee1 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -9,8 +9,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_CRASH_DUMP=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index b714ed0ef688..9acf48e53a87 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc) #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) +#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW) struct kernel_fpu; diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 9286430fe729..35c1d1b860d8 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -63,10 +63,6 @@ cond_syscall(__s390x_sys_##name); \ cond_syscall(__s390_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) - #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ long __s390_compat_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ @@ -85,15 +81,11 @@ /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ cond_syscall(__s390_compat_sys_##name) -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, name, ...) \ long __s390_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ @@ -124,9 +116,6 @@ #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, fullname, name, ...) #endif /* CONFIG_COMPAT */ diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 871092753591..c1032559ecd4 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -138,7 +138,6 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_PEDIT=m CONFIG_FW_LOADER=m CONFIG_CONNECTOR=m diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h index 927d80ba2332..76631714673c 100644 --- a/arch/sh/include/asm/kexec.h +++ b/arch/sh/include/asm/kexec.h @@ -28,7 +28,7 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_SH -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* arch/sh/kernel/machine_kexec.c */ void reserve_crashkernel(void); @@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } #else static inline void reserve_crashkernel(void) { } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* __ASM_SH_KEXEC_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 69cd9ac4b2ab..2d7e70537de0 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_IO_TRAPPED) += io_trapped.o diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index e8eeedc9b182..1de006b1c339 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -63,7 +63,7 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -88,7 +88,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 3d80515298d2..d3175f09b3aa 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, request_resource(res, &code_resource); request_resource(res, &data_resource); request_resource(res, &bss_resource); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE request_resource(res, &crashk_res); #endif diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 55c98fdd67d2..18d15d1ce87d 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void) { unsigned long addr = 0; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE char val[MAX_ADDR_LEN] = { }; int ret; diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index fd2669b1cb2d..21f9407be5d3 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -86,9 +86,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); return sys_ni_syscall(); \ } -#define __SYS_NI(abi, name) \ - SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers); - #ifdef CONFIG_X86_64 #define __X64_SYS_STUB0(name) \ __SYS_STUB0(x64, sys_##name) @@ -100,13 +97,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X64_COND_SYSCALL(name) \ __COND_SYSCALL(x64, sys_##name) -#define __X64_SYS_NI(name) \ - __SYS_NI(x64, sys_##name) #else /* CONFIG_X86_64 */ #define __X64_SYS_STUB0(name) #define __X64_SYS_STUBx(x, name, ...) #define __X64_COND_SYSCALL(name) -#define __X64_SYS_NI(name) #endif /* CONFIG_X86_64 */ #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) @@ -120,13 +114,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, sys_##name) -#define __IA32_SYS_NI(name) \ - __SYS_NI(ia32, sys_##name) #else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #define __IA32_SYS_STUB0(name) #define __IA32_SYS_STUBx(x, name, ...) #define __IA32_COND_SYSCALL(name) -#define __IA32_SYS_NI(name) #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #ifdef CONFIG_IA32_EMULATION @@ -135,8 +126,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the * ia32 regs in the proper order for shared or "common" syscalls. As some * syscalls may not be implemented, we need to expand COND_SYSCALL in - * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this - * case as well. + * kernel/sys_ni.c to cover this case as well. */ #define __IA32_COMPAT_SYS_STUB0(name) \ __SYS_STUB0(ia32, compat_sys_##name) @@ -148,14 +138,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, compat_sys_##name) -#define __IA32_COMPAT_SYS_NI(name) \ - __SYS_NI(ia32, compat_sys_##name) - #else /* CONFIG_IA32_EMULATION */ #define __IA32_COMPAT_SYS_STUB0(name) #define __IA32_COMPAT_SYS_STUBx(x, name, ...) #define __IA32_COMPAT_COND_SYSCALL(name) -#define __IA32_COMPAT_SYS_NI(name) #endif /* CONFIG_IA32_EMULATION */ @@ -175,13 +161,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(x64, compat_sys_##name) -#define __X32_COMPAT_SYS_NI(name) \ - __SYS_NI(x64, compat_sys_##name) #else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) -#define __X32_COMPAT_SYS_NI(name) #endif /* CONFIG_X86_X32_ABI */ @@ -212,17 +195,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ __IA32_COMPAT_COND_SYSCALL(name) \ __X32_COMPAT_COND_SYSCALL(name) -#define COMPAT_SYS_NI(name) \ - __IA32_COMPAT_SYS_NI(name) \ - __X32_COMPAT_SYS_NI(name) - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -243,8 +221,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not * hurt, we only need to re-define it here to keep the naming congruent to - * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() - * macros to work correctly. + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro + * to work correctly. */ #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ @@ -257,10 +235,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); __X64_COND_SYSCALL(name) \ __IA32_COND_SYSCALL(name) -#define SYS_NI(name) \ - __X64_SYS_NI(name) \ - __IA32_SYS_NI(name) - /* * For VSYSCALLS, we need to declare these three syscalls with the new diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 4ccf1994b97a..d530384f8d60 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -53,10 +53,12 @@ #define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ +#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) +#define BUTTRESS_ALL_IRQ_MASK (BUTTRESS_IRQ_MASK | \ + (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE))) + #define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) #define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) @@ -74,8 +76,12 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) vdev->wa.clear_runtime_mem = false; vdev->wa.d3hot_after_power_off = true; - if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4) + REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK); + if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) { + /* Writing 1s does not clear the interrupt status register */ vdev->wa.interrupt_clear_with_0 = true; + REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); + } IVPU_PRINT_WA(punit_disabled); IVPU_PRINT_WA(clear_runtime_mem); diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 2462796a512a..cdc5c08824a0 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -535,6 +535,8 @@ static int btintel_version_info_tlv(struct hci_dev *hdev, bt_dev_info(hdev, "%s timestamp %u.%u buildtype %u build %u", variant, 2000 + (version->timestamp >> 8), version->timestamp & 0xff, version->build_type, version->build_num); + if (version->img_type == 0x03) + bt_dev_info(hdev, "Firmware SHA1: 0x%8.8x", version->git_sha1); return 0; } @@ -630,6 +632,9 @@ static int btintel_parse_version_tlv(struct hci_dev *hdev, memcpy(&version->otp_bd_addr, tlv->val, sizeof(bdaddr_t)); break; + case INTEL_TLV_GIT_SHA1: + version->git_sha1 = get_unaligned_le32(tlv->val); + break; default: /* Ignore rest of information */ break; diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index 3a2d5b4219dd..d19fcdb9ff0b 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -41,7 +41,8 @@ enum { INTEL_TLV_LIMITED_CCE, INTEL_TLV_SBE_TYPE, INTEL_TLV_OTP_BDADDR, - INTEL_TLV_UNLOCKED_STATE + INTEL_TLV_UNLOCKED_STATE, + INTEL_TLV_GIT_SHA1 }; struct intel_tlv { @@ -69,6 +70,7 @@ struct intel_version_tlv { u8 min_fw_build_yy; u8 limited_cce; u8 sbe_type; + u32 git_sha1; bdaddr_t otp_bd_addr; }; diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index 935feab815d9..203a000a84e3 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -336,7 +336,7 @@ mtk_stp_split(struct btmtkuart_dev *bdev, const unsigned char *data, int count, return data; } -static int btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) +static void btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) { struct btmtkuart_dev *bdev = hci_get_drvdata(hdev); const unsigned char *p_left = data, *p_h4; @@ -375,25 +375,20 @@ static int btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) bt_dev_err(bdev->hdev, "Frame reassembly failed (%d)", err); bdev->rx_skb = NULL; - return err; + return; } sz_left -= sz_h4; p_left += sz_h4; } - - return 0; } static int btmtkuart_receive_buf(struct serdev_device *serdev, const u8 *data, size_t count) { struct btmtkuart_dev *bdev = serdev_device_get_drvdata(serdev); - int err; - err = btmtkuart_recv(bdev->hdev, data, count); - if (err < 0) - return err; + btmtkuart_recv(bdev->hdev, data, count); bdev->hdev->stat.byte_rx += count; diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index b7e66b7ac570..b7c56be078f8 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1276,11 +1276,9 @@ static int btnxpuart_receive_buf(struct serdev_device *serdev, const u8 *data, if (IS_ERR(nxpdev->rx_skb)) { int err = PTR_ERR(nxpdev->rx_skb); /* Safe to ignore out-of-sync bootloader signatures */ - if (is_fw_downloading(nxpdev)) - return count; - bt_dev_err(nxpdev->hdev, "Frame reassembly failed (%d)", err); - nxpdev->rx_skb = NULL; - return err; + if (!is_fw_downloading(nxpdev)) + bt_dev_err(nxpdev->hdev, "Frame reassembly failed (%d)", err); + return count; } if (!is_fw_downloading(nxpdev)) nxpdev->hdev->stat.byte_rx += count; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b8e9de887b5d..7835170b1d66 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -550,6 +550,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3571), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3572), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, /* Realtek Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x0bda, 0xe0, 0x01, 0x01), @@ -4629,6 +4631,10 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message) BT_DBG("intf %p", intf); + /* Don't suspend if there are connections */ + if (hci_conn_count(data->hdev)) + return -EBUSY; + if (data->suspend_count++) return 0; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 067e248e3599..94b8c406f0c0 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1815,6 +1815,24 @@ static void hci_coredump_qca(struct hci_dev *hdev) kfree_skb(skb); } +static int qca_get_data_path_id(struct hci_dev *hdev, __u8 *data_path_id) +{ + /* QCA uses 1 as non-HCI data path id for HFP */ + *data_path_id = 1; + return 0; +} + +static int qca_configure_hfp_offload(struct hci_dev *hdev) +{ + bt_dev_info(hdev, "HFP non-HCI data transport is supported"); + hdev->get_data_path_id = qca_get_data_path_id; + /* Do not need to send HCI_Configure_Data_Path to configure non-HCI + * data transport path for QCA controllers, so set below field as NULL. + */ + hdev->get_codec_config_data = NULL; + return 0; +} + static int qca_setup(struct hci_uart *hu) { struct hci_dev *hdev = hu->hdev; @@ -1969,6 +1987,10 @@ out: hu->hdev->set_bdaddr = qca_set_bdaddr_rome; else hu->hdev->set_bdaddr = qca_set_bdaddr; + + if (soc_type == QCA_QCA2066) + qca_configure_hfp_offload(hdev); + qca->fw_version = le16_to_cpu(ver.patch_ver); qca->controller_id = le16_to_cpu(ver.rom_ver); hci_devcd_register(hdev, hci_coredump_qca, qca_dmp_hdr, NULL); @@ -2039,6 +2061,7 @@ static const struct qca_device_data qca_soc_data_wcn3998 __maybe_unused = { static const struct qca_device_data qca_soc_data_qca2066 __maybe_unused = { .soc_type = QCA_QCA2066, .num_vregs = 0, + .capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES, }; static const struct qca_device_data qca_soc_data_qca6390 __maybe_unused = { diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f3892e9ce800..572d68d52965 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <asm/unaligned.h> +#include <linux/atomic.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/slab.h> @@ -44,6 +45,7 @@ struct vhci_data { bool wakeup; __u16 msft_opcode; bool aosp_capable; + atomic_t initialized; }; static int vhci_open_dev(struct hci_dev *hdev) @@ -75,11 +77,10 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); - mutex_lock(&data->open_mutex); skb_queue_tail(&data->readq, skb); - mutex_unlock(&data->open_mutex); - wake_up_interruptible(&data->read_wait); + if (atomic_read(&data->initialized)) + wake_up_interruptible(&data->read_wait); return 0; } @@ -464,7 +465,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) skb_put_u8(skb, 0xff); skb_put_u8(skb, opcode); put_unaligned_le16(hdev->id, skb_put(skb, 2)); - skb_queue_tail(&data->readq, skb); + skb_queue_head(&data->readq, skb); + atomic_inc(&data->initialized); wake_up_interruptible(&data->read_wait); return 0; diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index d57bc066dce6..9ed9239b1228 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2158,13 +2158,23 @@ static int sysc_reset(struct sysc *ddata) sysc_val = sysc_read_sysconfig(ddata); sysc_val |= sysc_mask; sysc_write(ddata, sysc_offset, sysc_val); - /* Flush posted write */ + + /* + * Some devices need a delay before reading registers + * after reset. Presumably a srst_udelay is not needed + * for devices that use a rstctrl register reset. + */ + if (ddata->cfg.srst_udelay) + fsleep(ddata->cfg.srst_udelay); + + /* + * Flush posted write. For devices needing srst_udelay + * this should trigger an interconnect error if the + * srst_udelay value is needed but not configured. + */ sysc_val = sysc_read_sysconfig(ddata); } - if (ddata->cfg.srst_udelay) - fsleep(ddata->cfg.srst_udelay); - if (ddata->post_reset_quirk) ddata->post_reset_quirk(ddata); diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index ad1acd9b7426..dbc3950c5960 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -767,6 +767,7 @@ config SM_CAMCC_8450 config SM_CAMCC_8550 tristate "SM8550 Camera Clock Controller" + depends on ARM64 || COMPILE_TEST select SM_GCC_8550 help Support for the camera clock controller on SM8550 devices. diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index aa53797dbfc1..75071e0cd321 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -138,7 +138,7 @@ PNAME(mux_pll_src_5plls_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3", "usb480 PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "gpll_div2", "usb480m" }; PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "gpll_div2" }; -PNAME(mux_aclk_peri_src_p) = { "gpll_peri", "cpll_peri", "gpll_div2_peri", "gpll_div3_peri" }; +PNAME(mux_clk_peri_src_p) = { "gpll", "cpll", "gpll_div2", "gpll_div3" }; PNAME(mux_mmc_src_p) = { "cpll", "gpll", "gpll_div2", "xin24m" }; PNAME(mux_clk_cif_out_src_p) = { "clk_cif_src", "xin24m" }; PNAME(mux_sclk_vop_src_p) = { "cpll", "gpll", "gpll_div2", "gpll_div3" }; @@ -275,23 +275,17 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { RK2928_CLKGATE_CON(0), 11, GFLAGS), /* PD_PERI */ - GATE(0, "gpll_peri", "gpll", CLK_IGNORE_UNUSED, + COMPOSITE(0, "clk_peri_src", mux_clk_peri_src_p, 0, + RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS, RK2928_CLKGATE_CON(2), 0, GFLAGS), - GATE(0, "cpll_peri", "cpll", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(2), 0, GFLAGS), - GATE(0, "gpll_div2_peri", "gpll_div2", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(2), 0, GFLAGS), - GATE(0, "gpll_div3_peri", "gpll_div3", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(2), 0, GFLAGS), - COMPOSITE_NOGATE(0, "aclk_peri_src", mux_aclk_peri_src_p, 0, - RK2928_CLKSEL_CON(10), 14, 2, MFLAGS, 0, 5, DFLAGS), - COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", 0, + + COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "clk_peri_src", 0, RK2928_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, RK2928_CLKGATE_CON(2), 3, GFLAGS), - COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", 0, + COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "clk_peri_src", 0, RK2928_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO, RK2928_CLKGATE_CON(2), 2, GFLAGS), - GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", 0, + GATE(ACLK_PERI, "aclk_peri", "clk_peri_src", 0, RK2928_CLKGATE_CON(2), 1, GFLAGS), GATE(SCLK_TIMER0, "sclk_timer0", "xin24m", 0, @@ -316,7 +310,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { GATE(SCLK_MIPI_24M, "clk_mipi_24m", "xin24m", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(2), 15, GFLAGS), - COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0, + COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0, RK2928_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, RK2928_CLKGATE_CON(2), 11, GFLAGS), @@ -490,7 +484,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { GATE(HCLK_I2S_2CH, "hclk_i2s_2ch", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS), GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 13, GFLAGS), GATE(HCLK_HOST2, "hclk_host2", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS), - GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(3), 13, GFLAGS), + GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 13, GFLAGS), GATE(0, "hclk_peri_ahb", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 14, GFLAGS), GATE(HCLK_SPDIF, "hclk_spdif", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 9, GFLAGS), GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK2928_CLKGATE_CON(10), 12, GFLAGS), diff --git a/drivers/clk/rockchip/clk-rk3568.c b/drivers/clk/rockchip/clk-rk3568.c index 16dabe2b9c47..db713e1526cd 100644 --- a/drivers/clk/rockchip/clk-rk3568.c +++ b/drivers/clk/rockchip/clk-rk3568.c @@ -72,6 +72,7 @@ static struct rockchip_pll_rate_table rk3568_pll_rates[] = { RK3036_PLL_RATE(408000000, 1, 68, 2, 2, 1, 0), RK3036_PLL_RATE(312000000, 1, 78, 6, 1, 1, 0), RK3036_PLL_RATE(297000000, 2, 99, 4, 1, 1, 0), + RK3036_PLL_RATE(292500000, 1, 195, 4, 4, 1, 0), RK3036_PLL_RATE(241500000, 2, 161, 4, 2, 1, 0), RK3036_PLL_RATE(216000000, 1, 72, 4, 2, 1, 0), RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0), diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 7f7b94f616a6..4028e8eeba82 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -59,9 +59,8 @@ static int cn_already_initialized; * both, or if both are zero then the group is looked up and sent there. */ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group, - gfp_t gfp_mask, - int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), - void *filter_data) + gfp_t gfp_mask, netlink_filter_fn filter, + void *filter_data) { struct cn_callback_entry *__cbq; unsigned int size; diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 1cc9be85ba4c..7d97790b893d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -363,10 +363,9 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) { resource_size_t base = -1; - down_read(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_dpa_rwsem); if (cxled->dpa_res) base = cxled->dpa_res->start; - up_read(&cxl_dpa_rwsem); return base; } @@ -839,6 +838,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->target_type = CXL_DECODER_HOSTONLYMEM; else cxld->target_type = CXL_DECODER_DEVMEM; + + guard(rwsem_write)(&cxl_region_rwsem); if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index fc5c2b414793..2f43d368ba07 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -227,10 +227,16 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (!port || !is_cxl_endpoint(port)) return -EINVAL; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ rc = cxl_get_poison_by_memdev(cxlmd); @@ -239,6 +245,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) rc = cxl_get_poison_by_endpoint(port); } up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } @@ -324,10 +331,16 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) goto out; @@ -355,6 +368,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); out: up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } @@ -372,10 +386,16 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - rc = down_read_interruptible(&cxl_dpa_rwsem); + rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; + rc = down_read_interruptible(&cxl_dpa_rwsem); + if (rc) { + up_read(&cxl_region_rwsem); + return rc; + } + rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) goto out; @@ -412,6 +432,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); out: up_read(&cxl_dpa_rwsem); + up_read(&cxl_region_rwsem); return rc; } diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index eff20e83d0a6..37e1652afbc7 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -620,7 +620,7 @@ void read_cdat_data(struct cxl_port *port) struct pci_dev *pdev = NULL; struct cxl_memdev *cxlmd; size_t cdat_length; - void *cdat_table; + void *cdat_table, *cdat_buf; int rc; if (is_cxl_memdev(uport)) { @@ -651,16 +651,15 @@ void read_cdat_data(struct cxl_port *port) return; } - cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32), - GFP_KERNEL); - if (!cdat_table) + cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL); + if (!cdat_buf) return; - rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); + rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length); if (rc) goto err; - cdat_table = cdat_table + sizeof(__le32); + cdat_table = cdat_buf + sizeof(__le32); if (cdat_checksum(cdat_table, cdat_length)) goto err; @@ -670,7 +669,7 @@ void read_cdat_data(struct cxl_port *port) err: /* Don't leave table data allocated on error */ - devm_kfree(dev, cdat_table); + devm_kfree(dev, cdat_buf); dev_err(dev, "Failed to read/validate CDAT.\n"); } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c index 7684c843e5a5..5d8e06b0ba6e 100644 --- a/drivers/cxl/core/pmu.c +++ b/drivers/cxl/core/pmu.c @@ -23,7 +23,7 @@ const struct device_type cxl_pmu_type = { static void remove_dev(void *dev) { - device_del(dev); + device_unregister(dev); } int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs, diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 38441634e4c6..b7c93bb18f6e 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -226,9 +226,9 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - u64 base = cxl_dpa_resource_start(cxled); - return sysfs_emit(buf, "%#llx\n", base); + guard(rwsem_read)(&cxl_dpa_rwsem); + return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled)); } static DEVICE_ATTR_RO(dpa_resource); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 56e575c79bb4..3e817a6f94c6 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2467,10 +2467,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) struct cxl_poison_context ctx; int rc = 0; - rc = down_read_interruptible(&cxl_region_rwsem); - if (rc) - return rc; - ctx = (struct cxl_poison_context) { .port = port }; @@ -2480,7 +2476,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev), &ctx); - up_read(&cxl_region_rwsem); return rc; } diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index 6a3abe5b1790..b53f46245c37 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -828,6 +828,7 @@ void fsl_edma_free_chan_resources(struct dma_chan *chan) dma_pool_destroy(fsl_chan->tcd_pool); fsl_chan->tcd_pool = NULL; fsl_chan->is_sw = false; + fsl_chan->srcid = 0; } void fsl_edma_cleanup_vchan(struct dma_device *dmadev) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 4635e16d7705..238a69bd0d6f 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -396,9 +396,8 @@ static int fsl_edma3_attach_pd(struct platform_device *pdev, struct fsl_edma_eng link = device_link_add(dev, pd_chan, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); - if (IS_ERR(link)) { - dev_err(dev, "Failed to add device_link to %d: %ld\n", i, - PTR_ERR(link)); + if (!link) { + dev_err(dev, "Failed to add device_link to %d\n", i); return -EINVAL; } @@ -631,6 +630,8 @@ static int fsl_edma_suspend_late(struct device *dev) for (i = 0; i < fsl_edma->n_chans; i++) { fsl_chan = &fsl_edma->chans[i]; + if (fsl_edma->chan_masked & BIT(i)) + continue; spin_lock_irqsave(&fsl_chan->vchan.lock, flags); /* Make sure chan is idle or will force disable. */ if (unlikely(!fsl_chan->idle)) { @@ -655,13 +656,16 @@ static int fsl_edma_resume_early(struct device *dev) for (i = 0; i < fsl_edma->n_chans; i++) { fsl_chan = &fsl_edma->chans[i]; + if (fsl_edma->chan_masked & BIT(i)) + continue; fsl_chan->pm_state = RUNNING; edma_write_tcdreg(fsl_chan, 0, csr); if (fsl_chan->slave_id != 0) fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true); } - edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); + if (!(fsl_edma->drvdata->flags & FSL_EDMA_DRV_SPLIT_REG)) + edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr); return 0; } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 7b54a3939ea1..315c004f58e4 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -440,12 +440,14 @@ union wqcfg { /* * This macro calculates the offset into the GRPCFG register * idxd - struct idxd * - * n - wq id - * ofs - the index of the 32b dword for the config register + * n - group id + * ofs - the index of the 64b qword for the config register * - * The WQCFG register block is divided into groups per each wq. The n index - * allows us to move to the register group that's for that particular wq. - * Each register is 32bits. The ofs gives us the number of register to access. + * The GRPCFG register block is divided into three sub-registers, which + * are GRPWQCFG, GRPENGCFG and GRPFLGCFG. The n index allows us to move + * to the register block that contains the three sub-registers. + * Each register block is 64bits. And the ofs gives us the offset + * within the GRPWQCFG register to access. */ #define GRPWQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->grpcfg_offset +\ (n) * GRPCFG_SIZE + sizeof(u64) * (ofs)) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index c01db23e3333..3f922518e3a5 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -183,13 +183,6 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) portal = idxd_wq_portal_addr(wq); /* - * The wmb() flushes writes to coherent DMA data before - * possibly triggering a DMA read. The wmb() is necessary - * even on UP because the recipient is a device. - */ - wmb(); - - /* * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ @@ -199,6 +192,13 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) llist_add(&desc->llnode, &ie->pending_llist); } + /* + * The wmb() flushes writes to coherent DMA data before + * possibly triggering a DMA read. The wmb() is necessary + * even on UP because the recipient is a device. + */ + wmb(); + if (wq_dedicated(wq)) { iosubmit_cmds512(portal, desc->hw, 1); } else { diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 72d83cd9ed6b..90857d08a1a7 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1246,8 +1246,8 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( enum dma_slave_buswidth max_width; struct stm32_dma_desc *desc; size_t xfer_count, offset; - u32 num_sgs, best_burst, dma_burst, threshold; - int i; + u32 num_sgs, best_burst, threshold; + int dma_burst, i; num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); desc = kzalloc(struct_size(desc, sg_req, num_sgs), GFP_NOWAIT); @@ -1266,6 +1266,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST, threshold, max_width); dma_burst = stm32_dma_get_burst(chan, best_burst); + if (dma_burst < 0) { + kfree(desc); + return NULL; + } stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); desc->sg_req[i].chan_reg.dma_scr = diff --git a/drivers/dma/ti/k3-psil-am62.c b/drivers/dma/ti/k3-psil-am62.c index 2b6fd6e37c61..1272b1541f61 100644 --- a/drivers/dma/ti/k3-psil-am62.c +++ b/drivers/dma/ti/k3-psil-am62.c @@ -74,7 +74,9 @@ static struct psil_ep am62_src_ep_map[] = { PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), PSIL_PDMA_XY_PKT(0x4302), PSIL_PDMA_XY_PKT(0x4303), PSIL_PDMA_XY_PKT(0x4304), @@ -85,8 +87,6 @@ static struct psil_ep am62_src_ep_map[] = { PSIL_PDMA_XY_PKT(0x4309), PSIL_PDMA_XY_PKT(0x430a), PSIL_PDMA_XY_PKT(0x430b), - PSIL_PDMA_XY_PKT(0x430c), - PSIL_PDMA_XY_PKT(0x430d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0x4400), PSIL_PDMA_XY_PKT(0x4401), @@ -141,7 +141,9 @@ static struct psil_ep am62_dst_ep_map[] = { /* SAUL */ PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), PSIL_PDMA_XY_PKT(0xc302), PSIL_PDMA_XY_PKT(0xc303), PSIL_PDMA_XY_PKT(0xc304), @@ -152,8 +154,6 @@ static struct psil_ep am62_dst_ep_map[] = { PSIL_PDMA_XY_PKT(0xc309), PSIL_PDMA_XY_PKT(0xc30a), PSIL_PDMA_XY_PKT(0xc30b), - PSIL_PDMA_XY_PKT(0xc30c), - PSIL_PDMA_XY_PKT(0xc30d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0xc400), PSIL_PDMA_XY_PKT(0xc401), diff --git a/drivers/dma/ti/k3-psil-am62a.c b/drivers/dma/ti/k3-psil-am62a.c index ca9d71f91422..4cf9123b0e93 100644 --- a/drivers/dma/ti/k3-psil-am62a.c +++ b/drivers/dma/ti/k3-psil-am62a.c @@ -84,7 +84,9 @@ static struct psil_ep am62a_src_ep_map[] = { PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0x4300), + PSIL_PDMA_XY_PKT(0x4301), PSIL_PDMA_XY_PKT(0x4302), PSIL_PDMA_XY_PKT(0x4303), PSIL_PDMA_XY_PKT(0x4304), @@ -95,8 +97,6 @@ static struct psil_ep am62a_src_ep_map[] = { PSIL_PDMA_XY_PKT(0x4309), PSIL_PDMA_XY_PKT(0x430a), PSIL_PDMA_XY_PKT(0x430b), - PSIL_PDMA_XY_PKT(0x430c), - PSIL_PDMA_XY_PKT(0x430d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0x4400), PSIL_PDMA_XY_PKT(0x4401), @@ -151,7 +151,9 @@ static struct psil_ep am62a_dst_ep_map[] = { /* SAUL */ PSIL_SAUL(0xf500, 27, 83, 8, 83, 1), PSIL_SAUL(0xf501, 28, 91, 8, 91, 1), - /* PDMA_MAIN0 - SPI0-3 */ + /* PDMA_MAIN0 - SPI0-2 */ + PSIL_PDMA_XY_PKT(0xc300), + PSIL_PDMA_XY_PKT(0xc301), PSIL_PDMA_XY_PKT(0xc302), PSIL_PDMA_XY_PKT(0xc303), PSIL_PDMA_XY_PKT(0xc304), @@ -162,8 +164,6 @@ static struct psil_ep am62a_dst_ep_map[] = { PSIL_PDMA_XY_PKT(0xc309), PSIL_PDMA_XY_PKT(0xc30a), PSIL_PDMA_XY_PKT(0xc30b), - PSIL_PDMA_XY_PKT(0xc30c), - PSIL_PDMA_XY_PKT(0xc30d), /* PDMA_MAIN1 - UART0-6 */ PSIL_PDMA_XY_PKT(0xc400), PSIL_PDMA_XY_PKT(0xc401), diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 87e730dfefa0..8625de20fc71 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -966,10 +966,10 @@ static int mc_probe(struct platform_device *pdev) edac_mc_id = emif_get_id(pdev->dev.of_node); regval = readl(ddrmc_baseaddr + XDDR_REG_CONFIG0_OFFSET); - num_chans = FIELD_PREP(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval); + num_chans = FIELD_GET(XDDR_REG_CONFIG0_NUM_CHANS_MASK, regval); num_chans++; - num_csrows = FIELD_PREP(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval); + num_csrows = FIELD_GET(XDDR_REG_CONFIG0_NUM_RANKS_MASK, regval); num_csrows *= 2; if (!num_csrows) num_csrows = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1f64d8cbb14d..8dee52ce26d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4516,8 +4516,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - amdgpu_ttm_set_buffer_funcs_status(adev, false); - amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d79b4ca1ecfc..5ad03f2afdb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1343,6 +1343,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) abo = ttm_to_amdgpu_bo(bo); + WARN_ON(abo->vm_bo); + if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a2287bb25223..a160265ddc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 9df011323d4b..6ede85b28cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -155,13 +155,6 @@ static int jpeg_v4_0_5_hw_init(void *handle) struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); - - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); if (r) return r; @@ -336,6 +329,14 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, true); + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + /* disable power gating */ r = jpeg_v4_0_5_disable_static_power_gating(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 45377a175250..8d5d86675a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -813,12 +813,12 @@ static int sdma_v2_4_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = sdma_v2_4_init_microcode(adev); if (r) return r; - adev->sdma.num_instances = SDMA_MAX_INSTANCE; - sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); sdma_v2_4_set_vm_pte_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 83c240f741b5..0058f3f7cf6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b452796fc6d3..c8c00c2a5224 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5182,6 +5182,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; + if (new_plane_state->rotation != DRM_MODE_ROTATE_0) + goto ffu; + num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 9649934ea186..e2a3aa8812df 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -465,6 +465,7 @@ struct dc_cursor_mi_param { struct fixed31_32 v_scale_ratio; enum dc_rotation_angle rotation; bool mirror; + struct dc_stream_state *stream; }; /* IPP related types */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 139cf31d2e45..89c3bf0fe0c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1077,8 +1077,16 @@ void hubp2_cursor_set_position( if (src_y_offset < 0) src_y_offset = 0; /* Save necessary cursor info x, y position. w, h is saved in attribute func. */ - hubp->cur_rect.x = src_x_offset + param->viewport.x; - hubp->cur_rect.y = src_y_offset + param->viewport.y; + if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 && + param->rotation != ROTATION_ANGLE_0) { + hubp->cur_rect.x = 0; + hubp->cur_rect.y = 0; + hubp->cur_rect.w = param->stream->timing.h_addressable; + hubp->cur_rect.h = param->stream->timing.v_addressable; + } else { + hubp->cur_rect.x = src_x_offset + param->viewport.x; + hubp->cur_rect.y = src_y_offset + param->viewport.y; + } } void hubp2_clk_cntl(struct hubp *hubp, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 39cf1ae3a3e1..f154a3eb1d1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 1, @@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 2, @@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 3, @@ -151,7 +151,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 4, @@ -160,7 +160,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, }, .num_states = 5, @@ -348,6 +348,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clock_limits[i].socclk_mhz; dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = + clock_limits[i].dtbclk_mhz; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = @@ -360,6 +362,8 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, clk_table->num_entries; dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = + clk_table->num_entries; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 4d1336e5afc2..180f8a98a361 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, + mode_lib->ms.soc.hostvm_min_page_size_kbytes, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index fa8fe5bf7e57..db06a5b749b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -423,8 +423,9 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) { - p->in_states->state_array[i].dtbclk_mhz = - dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; + if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0) + p->in_states->state_array[i].dtbclk_mhz = + dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz; } for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 2b8b8366538e..cdb903116eb7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3417,7 +3417,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, .rotation = pipe_ctx->plane_state->rotation, - .mirror = pipe_ctx->plane_state->horizontal_mirror + .mirror = pipe_ctx->plane_state->horizontal_mirror, + .stream = pipe_ctx->stream, }; bool pipe_split_on = false; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 996e4ee99023..e5cfaaef70b3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -287,8 +287,8 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if > 5000, it might be wrong readback - if (default_backlight > 5000000) + // if < 1 nits or > 5000, it might be wrong readback + if (default_backlight < 1000 || default_backlight > 5000000) default_backlight = 150000; return edp_set_backlight_level_nits(link, true, diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index a522a7c02911..1675314a3ff2 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index ca2ece24e1e0..49028dde0f87 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2198,10 +2198,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + } else if (DEVICE_ATTR_IS(pp_mclk_od)) { if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; - } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + } else if (DEVICE_ATTR_IS(pp_sclk_od)) { if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 2ed2585ded37..6899b3dc1f12 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -236,7 +236,7 @@ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && - rcu_access_pointer(file_priv->pid) == task_pid(current)) + rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index df9bf3c9206e..cb90e70d85e8 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -715,8 +715,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; - int ret; - int i; + int ret, i, num_connectors = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -871,6 +870,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, connector->name); connector_set[i] = connector; + num_connectors++; } } @@ -879,7 +879,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, set.y = crtc_req->y; set.mode = mode; set.connectors = connector_set; - set.num_connectors = crtc_req->count_connectors; + set.num_connectors = num_connectors; set.fb = fb; if (drm_drv_uses_atomic_modeset(dev)) @@ -892,7 +892,7 @@ out: drm_framebuffer_put(fb); if (connector_set) { - for (i = 0; i < crtc_req->count_connectors; i++) { + for (i = 0; i < num_connectors; i++) { if (connector_set[i]) drm_connector_put(connector_set[i]); } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 39db08f803ea..3b4065099872 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2309,7 +2309,8 @@ int drm_edid_override_connector_update(struct drm_connector *connector) override = drm_edid_override_get(connector); if (override) { - num_modes = drm_edid_connector_update(connector, override); + if (drm_edid_connector_update(connector, override) == 0) + num_modes = drm_edid_connector_add_modes(connector); drm_edid_free(override); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index dbc1b66c8ee4..1abfafbbfa75 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) { - u8 link_config[2]; + u8 lane_count = crtc_state->lane_count; - /* Write the link configuration data */ - link_config[0] = link_bw; - link_config[1] = crtc_state->lane_count; if (crtc_state->enhanced_framing) - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); + lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + if (link_bw) { + /* DP and eDP v1.3 and earlier link bw set method. */ + u8 link_config[] = { link_bw, lane_count }; - /* eDP 1.4 rate select method. */ - if (!link_bw) - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, - &rate_select, 1); + drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, + ARRAY_SIZE(link_config)); + } else { + /* + * eDP v1.4 and later link rate set method. + * + * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if + * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET. + * + * eDP v1.5 sinks allow choosing either, and the last choice + * shall be active. + */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select); + } } /* diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 19b35ece31f1..646f367a13f5 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1374,7 +1374,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane, struct drm_i915_private *i915 = to_i915(fb->base.dev); unsigned int stride_tiles; - if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) + if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) && + src_stride_tiles < dst_stride_tiles) stride_tiles = src_stride_tiles; else stride_tiles = dst_stride_tiles; @@ -1501,8 +1502,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p size += remap_info->size; } else { - unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane, - remap_info->width); + unsigned int dst_stride; + + /* + * The hardware automagically calculates the CCS AUX surface + * stride from the main surface stride so can't really remap a + * smaller subset (unless we'd remap in whole AUX page units). + */ + if (intel_fb_needs_pot_stride_remap(fb) && + intel_fb_is_ccs_modifier(fb->base.modifier)) + dst_stride = remap_info->src_stride; + else + dst_stride = remap_info->width; + + dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride); assign_chk_ovf(i915, remap_info->dst_stride, dst_stride); color_plane_info->mapping_stride = dst_stride * diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 1e7c97243fcf..8a934bada624 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; - struct intel_plane_state *plane_state = NULL; struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->uapi.state; @@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, /* walkthrough scaler_users bits and start assigning scalers */ for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) { + struct intel_plane_state *plane_state = NULL; int *scaler_id; const char *name; int idx, ret; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index d5ed904f355d..6801f8b95c53 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1293,7 +1293,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) if (msg) drm_notice(&engine->i915->drm, "Resetting %s for %s\n", engine->name, msg); - atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_increase_reset_engine_count(&engine->i915->gpu_error, engine); ret = intel_gt_reset_engine(engine); if (ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index d37698bd6b91..17df71117cc7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -5001,7 +5001,8 @@ static void capture_error_state(struct intel_guc *guc, if (match) { intel_engine_set_hung_context(e, ce); engine_mask |= e->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, + e); } } @@ -5013,7 +5014,7 @@ static void capture_error_state(struct intel_guc *guc, } else { intel_engine_set_hung_context(ce->engine, ce); engine_mask = ce->engine->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); } with_intel_runtime_pm(&i915->runtime_pm, wakeref) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 9f5971f5e980..48f6c00402c4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -16,6 +16,7 @@ #include "display/intel_display_device.h" #include "gt/intel_engine.h" +#include "gt/intel_engine_types.h" #include "gt/intel_gt_types.h" #include "gt/uc/intel_uc_fw.h" @@ -232,7 +233,7 @@ struct i915_gpu_error { atomic_t reset_count; /** Number of times an engine has been reset */ - atomic_t reset_engine_count[I915_NUM_ENGINES]; + atomic_t reset_engine_count[MAX_ENGINE_CLASS]; }; struct drm_i915_error_state_buf { @@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, const struct intel_engine_cs *engine) { - return atomic_read(&error->reset_engine_count[engine->uabi_class]); + return atomic_read(&error->reset_engine_count[engine->class]); +} + +static inline void +i915_increase_reset_engine_count(struct i915_gpu_error *error, + const struct intel_engine_cs *engine) +{ + atomic_inc(&error->reset_engine_count[engine->class]); } #define CORE_DUMP_FLAG_NONE 0x0 diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 4ddc6d902752..7d41874a49c5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t, } for_each_engine(engine, gt, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); + t->reset_engine[i][id] = + i915_reset_engine_count(&i915->gpu_error, + engine); } t->reset_global = i915_reset_count(&i915->gpu_error); @@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t) for_each_gt(gt, i915, i) { for_each_engine(engine, gt, id) { - if (t->reset_engine[id] == + if (t->reset_engine[i][id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n", t->func, t->name, engine->name, i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); + t->reset_engine[i][id]); return -EIO; } } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index 36ed42736c52..83e3ad430922 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,6 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H +#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */ #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; @@ -17,7 +18,7 @@ struct igt_live_test { const char *name; unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; + unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES]; }; /* diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c index f81dc34c9c3e..c1bc8b00d938 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c @@ -203,7 +203,7 @@ void mtk_gamma_set(struct device *dev, struct drm_crtc_state *state) /* Disable RELAY mode to pass the processed image */ cfg_val &= ~GAMMA_RELAY_MODE; - cfg_val = readl(gamma->regs + DISP_GAMMA_CFG); + writel(cfg_val, gamma->regs + DISP_GAMMA_CFG); } void mtk_gamma_config(struct device *dev, unsigned int w, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index c277b9fae950..db43f9dff912 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -788,6 +788,7 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, crtc); struct mtk_crtc_state *mtk_crtc_state = to_mtk_crtc_state(crtc_state); struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + unsigned long flags; if (mtk_crtc->event && mtk_crtc_state->base.event) DRM_ERROR("new event while there is still a pending event\n"); @@ -795,7 +796,11 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, if (mtk_crtc_state->base.event) { mtk_crtc_state->base.event->pipe = drm_crtc_index(crtc); WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + spin_lock_irqsave(&crtc->dev->event_lock, flags); mtk_crtc->event = mtk_crtc_state->base.event; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + mtk_crtc_state->base.event = NULL; } } @@ -921,7 +926,14 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc) { - struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_drm_crtc *mtk_crtc = NULL; + + if (!crtc) + return NULL; + + mtk_crtc = to_mtk_crtc(crtc); + if (!mtk_crtc) + return NULL; return mtk_crtc->dma_dev; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2dfaa613276a..2b0c35cacbc6 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -443,6 +443,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) struct mtk_drm_private *private = drm->dev_private; struct mtk_drm_private *priv_n; struct device *dma_dev = NULL; + struct drm_crtc *crtc; int ret, i, j; if (drm_firmware_drivers_only()) @@ -519,7 +520,9 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* Use OVL device for all DMA memory allocations */ - dma_dev = mtk_drm_crtc_dma_dev_get(drm_crtc_from_index(drm, 0)); + crtc = drm_crtc_from_index(drm, 0); + if (crtc) + dma_dev = mtk_drm_crtc_dma_dev_get(crtc); if (!dma_dev) { ret = -ENODEV; dev_err(drm->dev, "Need at least one OVL device\n"); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c index e4279f1772a1..377d0e0cef84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c @@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc) /* Ensure an ior is hooked up to this outp already */ ior = outp->func->inherit(outp); - if (!ior) + if (!ior || !ior->arm.head) return -ENODEV; /* With iors, there will be a separate output path for each type of connector - and all of diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 1b811d6972a1..201022ae9214 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -49,14 +49,14 @@ #include <subdev/mmu.h> struct gk20a_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ u32 *vaddr; }; -#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) +#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory) /* * Used for objects allocated using the DMA API @@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) list_del(&obj->vaddr_node); vunmap(obj->base.vaddr); obj->base.vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); + imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, { struct gk20a_instobj *node = gk20a_instobj(memory); struct nvkm_vmm_map map = { - .memory = &node->memory, + .memory = &node->base.memory, .offset = offset, .mem = node->mn, }; @@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, return -ENOMEM; *_node = &node->base; - nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; node->dma_addrs = (void *)(node->pages + npages); - nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, else ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT, align, &node); - *pmemory = node ? &node->memory : NULL; + *pmemory = node ? &node->base.memory : NULL; if (ret) return ret; diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c index 6e3670508e3a..30919c872ac8 100644 --- a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c +++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c @@ -326,7 +326,7 @@ static const struct drm_display_mode ltk050h3148w_mode = { static const struct ltk050h3146w_desc ltk050h3148w_data = { .mode = <k050h3148w_mode, .init = ltk050h3148w_init_sequence, - .mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE, + .mode_flags = MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_VIDEO_BURST, }; static int ltk050h3146w_init_sequence(struct ltk050h3146w *ctx) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 138f154fecef..997c3a1adaca 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -325,28 +325,28 @@ struct joycon_imu_cal { * All the controller's button values are stored in a u32. * They can be accessed with bitwise ANDs. */ -static const u32 JC_BTN_Y = BIT(0); -static const u32 JC_BTN_X = BIT(1); -static const u32 JC_BTN_B = BIT(2); -static const u32 JC_BTN_A = BIT(3); -static const u32 JC_BTN_SR_R = BIT(4); -static const u32 JC_BTN_SL_R = BIT(5); -static const u32 JC_BTN_R = BIT(6); -static const u32 JC_BTN_ZR = BIT(7); -static const u32 JC_BTN_MINUS = BIT(8); -static const u32 JC_BTN_PLUS = BIT(9); -static const u32 JC_BTN_RSTICK = BIT(10); -static const u32 JC_BTN_LSTICK = BIT(11); -static const u32 JC_BTN_HOME = BIT(12); -static const u32 JC_BTN_CAP = BIT(13); /* capture button */ -static const u32 JC_BTN_DOWN = BIT(16); -static const u32 JC_BTN_UP = BIT(17); -static const u32 JC_BTN_RIGHT = BIT(18); -static const u32 JC_BTN_LEFT = BIT(19); -static const u32 JC_BTN_SR_L = BIT(20); -static const u32 JC_BTN_SL_L = BIT(21); -static const u32 JC_BTN_L = BIT(22); -static const u32 JC_BTN_ZL = BIT(23); +#define JC_BTN_Y BIT(0) +#define JC_BTN_X BIT(1) +#define JC_BTN_B BIT(2) +#define JC_BTN_A BIT(3) +#define JC_BTN_SR_R BIT(4) +#define JC_BTN_SL_R BIT(5) +#define JC_BTN_R BIT(6) +#define JC_BTN_ZR BIT(7) +#define JC_BTN_MINUS BIT(8) +#define JC_BTN_PLUS BIT(9) +#define JC_BTN_RSTICK BIT(10) +#define JC_BTN_LSTICK BIT(11) +#define JC_BTN_HOME BIT(12) +#define JC_BTN_CAP BIT(13) /* capture button */ +#define JC_BTN_DOWN BIT(16) +#define JC_BTN_UP BIT(17) +#define JC_BTN_RIGHT BIT(18) +#define JC_BTN_LEFT BIT(19) +#define JC_BTN_SR_L BIT(20) +#define JC_BTN_SL_L BIT(21) +#define JC_BTN_L BIT(22) +#define JC_BTN_ZL BIT(23) enum joycon_msg_type { JOYCON_MSG_TYPE_NONE, @@ -927,14 +927,27 @@ static int joycon_request_calibration(struct joycon_ctlr *ctlr) */ static void joycon_calc_imu_cal_divisors(struct joycon_ctlr *ctlr) { - int i; + int i, divz = 0; for (i = 0; i < 3; i++) { ctlr->imu_cal_accel_divisor[i] = ctlr->accel_cal.scale[i] - ctlr->accel_cal.offset[i]; ctlr->imu_cal_gyro_divisor[i] = ctlr->gyro_cal.scale[i] - ctlr->gyro_cal.offset[i]; + + if (ctlr->imu_cal_accel_divisor[i] == 0) { + ctlr->imu_cal_accel_divisor[i] = 1; + divz++; + } + + if (ctlr->imu_cal_gyro_divisor[i] == 0) { + ctlr->imu_cal_gyro_divisor[i] = 1; + divz++; + } } + + if (divz) + hid_warn(ctlr->hdev, "inaccurate IMU divisors (%d)\n", divz); } static const s16 DFLT_ACCEL_OFFSET /*= 0*/; @@ -1163,16 +1176,16 @@ static void joycon_parse_imu_report(struct joycon_ctlr *ctlr, JC_IMU_SAMPLES_PER_DELTA_AVG) { ctlr->imu_avg_delta_ms = ctlr->imu_delta_samples_sum / ctlr->imu_delta_samples_count; - /* don't ever want divide by zero shenanigans */ - if (ctlr->imu_avg_delta_ms == 0) { - ctlr->imu_avg_delta_ms = 1; - hid_warn(ctlr->hdev, - "calculated avg imu delta of 0\n"); - } ctlr->imu_delta_samples_count = 0; ctlr->imu_delta_samples_sum = 0; } + /* don't ever want divide by zero shenanigans */ + if (ctlr->imu_avg_delta_ms == 0) { + ctlr->imu_avg_delta_ms = 1; + hid_warn(ctlr->hdev, "calculated avg imu delta of 0\n"); + } + /* useful for debugging IMU sample rate */ hid_dbg(ctlr->hdev, "imu_report: ms=%u last_ms=%u delta=%u avg_delta=%u\n", diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2a8b081bce7d..3ff87cb4dc49 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -660,6 +660,7 @@ config DM_ZONED config DM_AUDIT bool "DM audit events" + depends on BLK_DEV_DM depends on AUDIT help Generate audit events for device-mapper. diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e85c688fd91e..c5f03aab4552 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1755,11 +1755,12 @@ static void integrity_metadata(struct work_struct *w) sectors_to_process = dio->range.n_sectors; __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + struct bio_vec bv_copy = bv; unsigned int pos; char *mem, *checksums_ptr; again: - mem = bvec_kmap_local(&bv); + mem = bvec_kmap_local(&bv_copy); pos = 0; checksums_ptr = checksums; do { @@ -1768,7 +1769,7 @@ again: sectors_to_process -= ic->sectors_per_block; pos += ic->sectors_per_block << SECTOR_SHIFT; sector += ic->sectors_per_block; - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); + } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack); kunmap_local(mem); r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, @@ -1793,9 +1794,9 @@ again: if (!sectors_to_process) break; - if (unlikely(pos < bv.bv_len)) { - bv.bv_offset += pos; - bv.bv_len -= pos; + if (unlikely(pos < bv_copy.bv_len)) { + bv_copy.bv_offset += pos; + bv_copy.bv_len -= pos; goto again; } } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 91ebdcc6e9a8..eb009d6bb03a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3317,6 +3317,9 @@ static void raid_dtr(struct dm_target *ti) mddev_lock_nointr(&rs->md); md_stop(&rs->md); mddev_unlock(&rs->md); + + if (work_pending(&rs->md.event_work)) + flush_work(&rs->md.event_work); raid_set_free(rs); } diff --git a/drivers/md/md.c b/drivers/md/md.c index b066abbffd10..9bdd57324c37 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -82,6 +82,14 @@ static struct module *md_cluster_mod; static DECLARE_WAIT_QUEUE_HEAD(resync_wait); static struct workqueue_struct *md_wq; + +/* + * This workqueue is used for sync_work to register new sync_thread, and for + * del_work to remove rdev, and for event_work that is only set by dm-raid. + * + * Noted that sync_work will grab reconfig_mutex, hence never flush this + * workqueue whith reconfig_mutex grabbed. + */ static struct workqueue_struct *md_misc_wq; struct workqueue_struct *md_bitmap_wq; @@ -6330,9 +6338,6 @@ static void __md_stop(struct mddev *mddev) struct md_personality *pers = mddev->pers; md_bitmap_destroy(mddev); mddev_detach(mddev); - /* Ensure ->event_work is done */ - if (mddev->event_work.func) - flush_workqueue(md_misc_wq); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 6d07693c6b9f..fe17c7f98e81 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_trace_printk: - if (bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (perfmon_capable()) return bpf_get_trace_printk_proto(); fallthrough; default: diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile index f1f752a8f7bb..6ab615365172 100644 --- a/drivers/net/ethernet/amazon/ena/Makefile +++ b/drivers/net/ethernet/amazon/ena/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_ENA_ETHERNET) += ena.o -ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o +ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o ena_xdp.o diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 2d8c5a2841b8..0cb6cc1cef56 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -7,6 +7,7 @@ #include <linux/pci.h> #include "ena_netdev.h" +#include "ena_xdp.h" struct ena_stats { char name[ETH_GSTRING_LEN]; @@ -262,17 +263,14 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) ena_stats->name); } - if (!is_xdp) { - /* RX stats, in XDP there isn't a RX queue - * counterpart - */ - for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { - ena_stats = &ena_stats_rx_strings[j]; + /* In XDP there isn't an RX queue counterpart */ + if (is_xdp) + continue; - ethtool_sprintf(data, - "queue_%u_rx_%s", i, - ena_stats->name); - } + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + ethtool_sprintf(data, "queue_%u_rx_%s", i, ena_stats->name); } } } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index afd1b7ce0013..1c0a7828d397 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -19,8 +19,8 @@ #include <net/ip.h> #include "ena_netdev.h" -#include <linux/bpf_trace.h> #include "ena_pci_id_tbl.h" +#include "ena_xdp.h" MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); MODULE_DESCRIPTION(DEVICE_NAME); @@ -45,53 +45,6 @@ static void check_for_admin_com_state(struct ena_adapter *adapter); static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); -static void ena_init_io_rings(struct ena_adapter *adapter, - int first_index, int count); -static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, - int count); -static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, - int count); -static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); -static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, - int count); -static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); -static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); -static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); -static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); -static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); -static void ena_napi_disable_in_range(struct ena_adapter *adapter, - int first_index, int count); -static void ena_napi_enable_in_range(struct ena_adapter *adapter, - int first_index, int count); -static int ena_up(struct ena_adapter *adapter); -static void ena_down(struct ena_adapter *adapter); -static void ena_unmask_interrupt(struct ena_ring *tx_ring, - struct ena_ring *rx_ring); -static void ena_update_ring_numa_node(struct ena_ring *tx_ring, - struct ena_ring *rx_ring); -static void ena_unmap_tx_buff(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info); -static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, - int first_index, int count); -static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, int count); - -/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ -static void ena_increase_stat(u64 *statp, u64 cnt, - struct u64_stats_sync *syncp) -{ - u64_stats_update_begin(syncp); - (*statp) += cnt; - u64_stats_update_end(syncp); -} - -static void ena_ring_tx_doorbell(struct ena_ring *tx_ring) -{ - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp); -} - static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ena_adapter *adapter = netdev_priv(dev); @@ -135,19 +88,18 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) return ret; } -static int ena_xmit_common(struct net_device *dev, - struct ena_ring *ring, - struct ena_tx_buffer *tx_info, - struct ena_com_tx_ctx *ena_tx_ctx, - u16 next_to_use, - u32 bytes) +int ena_xmit_common(struct ena_adapter *adapter, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes) { - struct ena_adapter *adapter = netdev_priv(dev); int rc, nb_hw_desc; if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, ena_tx_ctx))) { - netif_dbg(adapter, tx_queued, dev, + netif_dbg(adapter, tx_queued, adapter->netdev, "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", ring->qid); ena_ring_tx_doorbell(ring); @@ -162,7 +114,7 @@ static int ena_xmit_common(struct net_device *dev, * ena_com_prepare_tx() are fatal and therefore require a device reset. */ if (unlikely(rc)) { - netif_err(adapter, tx_queued, dev, + netif_err(adapter, tx_queued, adapter->netdev, "Failed to prepare tx bufs\n"); ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp); @@ -178,6 +130,7 @@ static int ena_xmit_common(struct net_device *dev, u64_stats_update_end(&ring->syncp); tx_info->tx_descs = nb_hw_desc; + tx_info->total_tx_size = bytes; tx_info->last_jiffies = jiffies; tx_info->print_once = 0; @@ -186,467 +139,6 @@ static int ena_xmit_common(struct net_device *dev, return 0; } -/* This is the XDP napi callback. XDP queues use a separate napi callback - * than Rx/Tx queues. - */ -static int ena_xdp_io_poll(struct napi_struct *napi, int budget) -{ - struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); - u32 xdp_work_done, xdp_budget; - struct ena_ring *xdp_ring; - int napi_comp_call = 0; - int ret; - - xdp_ring = ena_napi->xdp_ring; - - xdp_budget = budget; - - if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || - test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { - napi_complete_done(napi, 0); - return 0; - } - - xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); - - /* If the device is about to reset or down, avoid unmask - * the interrupt and return 0 so NAPI won't reschedule - */ - if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { - napi_complete_done(napi, 0); - ret = 0; - } else if (xdp_budget > xdp_work_done) { - napi_comp_call = 1; - if (napi_complete_done(napi, xdp_work_done)) - ena_unmask_interrupt(xdp_ring, NULL); - ena_update_ring_numa_node(xdp_ring, NULL); - ret = xdp_work_done; - } else { - ret = xdp_budget; - } - - u64_stats_update_begin(&xdp_ring->syncp); - xdp_ring->tx_stats.napi_comp += napi_comp_call; - xdp_ring->tx_stats.tx_poll++; - u64_stats_update_end(&xdp_ring->syncp); - xdp_ring->tx_stats.last_napi_jiffies = jiffies; - - return ret; -} - -static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring, - struct ena_tx_buffer *tx_info, - struct xdp_frame *xdpf, - struct ena_com_tx_ctx *ena_tx_ctx) -{ - struct ena_adapter *adapter = xdp_ring->adapter; - struct ena_com_buf *ena_buf; - int push_len = 0; - dma_addr_t dma; - void *data; - u32 size; - - tx_info->xdpf = xdpf; - data = tx_info->xdpf->data; - size = tx_info->xdpf->len; - - if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - /* Designate part of the packet for LLQ */ - push_len = min_t(u32, size, xdp_ring->tx_max_header_size); - - ena_tx_ctx->push_header = data; - - size -= push_len; - data += push_len; - } - - ena_tx_ctx->header_len = push_len; - - if (size > 0) { - dma = dma_map_single(xdp_ring->dev, - data, - size, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) - goto error_report_dma_error; - - tx_info->map_linear_data = 0; - - ena_buf = tx_info->bufs; - ena_buf->paddr = dma; - ena_buf->len = size; - - ena_tx_ctx->ena_bufs = ena_buf; - ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; - } - - return 0; - -error_report_dma_error: - ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1, - &xdp_ring->syncp); - netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); - - return -EINVAL; -} - -static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, - struct net_device *dev, - struct xdp_frame *xdpf, - int flags) -{ - struct ena_com_tx_ctx ena_tx_ctx = {}; - struct ena_tx_buffer *tx_info; - u16 next_to_use, req_id; - int rc; - - next_to_use = xdp_ring->next_to_use; - req_id = xdp_ring->free_ids[next_to_use]; - tx_info = &xdp_ring->tx_buffer_info[req_id]; - tx_info->num_of_bufs = 0; - - rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx); - if (unlikely(rc)) - return rc; - - ena_tx_ctx.req_id = req_id; - - rc = ena_xmit_common(dev, - xdp_ring, - tx_info, - &ena_tx_ctx, - next_to_use, - xdpf->len); - if (rc) - goto error_unmap_dma; - - /* trigger the dma engine. ena_ring_tx_doorbell() - * calls a memory barrier inside it. - */ - if (flags & XDP_XMIT_FLUSH) - ena_ring_tx_doorbell(xdp_ring); - - return rc; - -error_unmap_dma: - ena_unmap_tx_buff(xdp_ring, tx_info); - tx_info->xdpf = NULL; - return rc; -} - -static int ena_xdp_xmit(struct net_device *dev, int n, - struct xdp_frame **frames, u32 flags) -{ - struct ena_adapter *adapter = netdev_priv(dev); - struct ena_ring *xdp_ring; - int qid, i, nxmit = 0; - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) - return -EINVAL; - - if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) - return -ENETDOWN; - - /* We assume that all rings have the same XDP program */ - if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog)) - return -ENXIO; - - qid = smp_processor_id() % adapter->xdp_num_queues; - qid += adapter->xdp_first_ring; - xdp_ring = &adapter->tx_ring[qid]; - - /* Other CPU ids might try to send thorugh this queue */ - spin_lock(&xdp_ring->xdp_tx_lock); - - for (i = 0; i < n; i++) { - if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0)) - break; - nxmit++; - } - - /* Ring doorbell to make device aware of the packets */ - if (flags & XDP_XMIT_FLUSH) - ena_ring_tx_doorbell(xdp_ring); - - spin_unlock(&xdp_ring->xdp_tx_lock); - - /* Return number of packets sent */ - return nxmit; -} - -static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) -{ - u32 verdict = ENA_XDP_PASS; - struct bpf_prog *xdp_prog; - struct ena_ring *xdp_ring; - struct xdp_frame *xdpf; - u64 *xdp_stat; - - xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); - - if (!xdp_prog) - goto out; - - verdict = bpf_prog_run_xdp(xdp_prog, xdp); - - switch (verdict) { - case XDP_TX: - xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) { - trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = ENA_XDP_DROP; - break; - } - - /* Find xmit queue */ - xdp_ring = rx_ring->xdp_ring; - - /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */ - spin_lock(&xdp_ring->xdp_tx_lock); - - if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, - XDP_XMIT_FLUSH)) - xdp_return_frame(xdpf); - - spin_unlock(&xdp_ring->xdp_tx_lock); - xdp_stat = &rx_ring->rx_stats.xdp_tx; - verdict = ENA_XDP_TX; - break; - case XDP_REDIRECT: - if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { - xdp_stat = &rx_ring->rx_stats.xdp_redirect; - verdict = ENA_XDP_REDIRECT; - break; - } - trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = ENA_XDP_DROP; - break; - case XDP_ABORTED: - trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = ENA_XDP_DROP; - break; - case XDP_DROP: - xdp_stat = &rx_ring->rx_stats.xdp_drop; - verdict = ENA_XDP_DROP; - break; - case XDP_PASS: - xdp_stat = &rx_ring->rx_stats.xdp_pass; - verdict = ENA_XDP_PASS; - break; - default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_invalid; - verdict = ENA_XDP_DROP; - } - - ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); -out: - return verdict; -} - -static void ena_init_all_xdp_queues(struct ena_adapter *adapter) -{ - adapter->xdp_first_ring = adapter->num_io_queues; - adapter->xdp_num_queues = adapter->num_io_queues; - - ena_init_io_rings(adapter, - adapter->xdp_first_ring, - adapter->xdp_num_queues); -} - -static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) -{ - u32 xdp_first_ring = adapter->xdp_first_ring; - u32 xdp_num_queues = adapter->xdp_num_queues; - int rc = 0; - - rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); - if (rc) - goto setup_err; - - rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); - if (rc) - goto create_err; - - return 0; - -create_err: - ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); -setup_err: - return rc; -} - -/* Provides a way for both kernel and bpf-prog to know - * more about the RX-queue a given XDP frame arrived on. - */ -static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) -{ - int rc; - - rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0); - - if (rc) { - netif_err(rx_ring->adapter, ifup, rx_ring->netdev, - "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", - rx_ring->qid, rc); - goto err; - } - - rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, - NULL); - - if (rc) { - netif_err(rx_ring->adapter, ifup, rx_ring->netdev, - "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", - rx_ring->qid, rc); - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - } - -err: - return rc; -} - -static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) -{ - xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); -} - -static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, - struct bpf_prog *prog, - int first, int count) -{ - struct bpf_prog *old_bpf_prog; - struct ena_ring *rx_ring; - int i = 0; - - for (i = first; i < count; i++) { - rx_ring = &adapter->rx_ring[i]; - old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); - - if (!old_bpf_prog && prog) { - ena_xdp_register_rxq_info(rx_ring); - rx_ring->rx_headroom = XDP_PACKET_HEADROOM; - } else if (old_bpf_prog && !prog) { - ena_xdp_unregister_rxq_info(rx_ring); - rx_ring->rx_headroom = NET_SKB_PAD; - } - } -} - -static void ena_xdp_exchange_program(struct ena_adapter *adapter, - struct bpf_prog *prog) -{ - struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); - - ena_xdp_exchange_program_rx_in_range(adapter, - prog, - 0, - adapter->num_io_queues); - - if (old_bpf_prog) - bpf_prog_put(old_bpf_prog); -} - -static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) -{ - bool was_up; - int rc; - - was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); - - if (was_up) - ena_down(adapter); - - adapter->xdp_first_ring = 0; - adapter->xdp_num_queues = 0; - ena_xdp_exchange_program(adapter, NULL); - if (was_up) { - rc = ena_up(adapter); - if (rc) - return rc; - } - return 0; -} - -static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) -{ - struct ena_adapter *adapter = netdev_priv(netdev); - struct bpf_prog *prog = bpf->prog; - struct bpf_prog *old_bpf_prog; - int rc, prev_mtu; - bool is_up; - - is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); - rc = ena_xdp_allowed(adapter); - if (rc == ENA_XDP_ALLOWED) { - old_bpf_prog = adapter->xdp_bpf_prog; - if (prog) { - if (!is_up) { - ena_init_all_xdp_queues(adapter); - } else if (!old_bpf_prog) { - ena_down(adapter); - ena_init_all_xdp_queues(adapter); - } - ena_xdp_exchange_program(adapter, prog); - - if (is_up && !old_bpf_prog) { - rc = ena_up(adapter); - if (rc) - return rc; - } - xdp_features_set_redirect_target(netdev, false); - } else if (old_bpf_prog) { - xdp_features_clear_redirect_target(netdev); - rc = ena_destroy_and_free_all_xdp_queues(adapter); - if (rc) - return rc; - } - - prev_mtu = netdev->max_mtu; - netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; - - if (!old_bpf_prog) - netif_info(adapter, drv, adapter->netdev, - "XDP program is set, changing the max_mtu from %d to %d", - prev_mtu, netdev->max_mtu); - - } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { - netif_err(adapter, drv, adapter->netdev, - "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", - netdev->mtu, ENA_XDP_MAX_MTU); - NL_SET_ERR_MSG_MOD(bpf->extack, - "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); - return -EINVAL; - } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { - netif_err(adapter, drv, adapter->netdev, - "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", - adapter->num_io_queues, adapter->max_num_io_queues); - NL_SET_ERR_MSG_MOD(bpf->extack, - "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); - return -EINVAL; - } - - return 0; -} - -/* This is the main xdp callback, it's used by the kernel to set/unset the xdp - * program as well as to query the current xdp program id. - */ -static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) -{ - switch (bpf->command) { - case XDP_SETUP_PROG: - return ena_xdp_set(netdev, bpf); - default: - return -EINVAL; - } - return 0; -} - static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL @@ -688,8 +180,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, u64_stats_init(&ring->syncp); } -static void ena_init_io_rings(struct ena_adapter *adapter, - int first_index, int count) +void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev; struct ena_ring *txr, *rxr; @@ -820,9 +312,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->push_buf_intermediate_buf = NULL; } -static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, - int count) +int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) { int i, rc = 0; @@ -845,8 +336,8 @@ err_setup_tx: return rc; } -static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, int count) +void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) { int i; @@ -859,7 +350,7 @@ static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, * * Free all transmit software resources */ -static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) +void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { ena_free_all_io_tx_resources_in_range(adapter, 0, @@ -1169,8 +660,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) ena_free_rx_bufs(adapter, i); } -static void ena_unmap_tx_buff(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info) +void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) { struct ena_com_buf *ena_buf; u32 cnt; @@ -1262,6 +753,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) for (i = 0; i < adapter->num_io_queues; i++) { ena_qid = ENA_IO_RXQ_IDX(i); cancel_work_sync(&adapter->ena_napi[i].dim.work); + ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } } @@ -1272,8 +764,8 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } -static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, - struct ena_tx_buffer *tx_info, bool is_xdp) +int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp) { if (tx_info) netif_err(ring->adapter, @@ -1305,17 +797,6 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) return handle_invalid_req_id(tx_ring, req_id, tx_info, false); } -static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) -{ - struct ena_tx_buffer *tx_info; - - tx_info = &xdp_ring->tx_buffer_info[req_id]; - if (likely(tx_info->xdpf)) - return 0; - - return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); -} - static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) { struct netdev_queue *txq; @@ -1363,7 +844,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) "tx_poll: q %d skb %p completed\n", tx_ring->qid, skb); - tx_bytes += skb->len; + tx_bytes += tx_info->total_tx_size; dev_kfree_skb(skb); tx_pkts++; total_done += tx_info->tx_descs; @@ -1688,6 +1169,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u return ret; } + /* ena_clean_rx_irq - Cleanup RX irq * @rx_ring: RX ring to clean * @napi: napi handler @@ -1880,8 +1362,8 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) rx_ring->per_napi_packets = 0; } -static void ena_unmask_interrupt(struct ena_ring *tx_ring, - struct ena_ring *rx_ring) +void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) { u32 rx_interval = tx_ring->smoothed_interval; struct ena_eth_io_intr_reg intr_reg; @@ -1913,8 +1395,8 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); } -static void ena_update_ring_numa_node(struct ena_ring *tx_ring, - struct ena_ring *rx_ring) +void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) { int cpu = get_cpu(); int numa_node; @@ -1949,67 +1431,6 @@ out: put_cpu(); } -static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) -{ - u32 total_done = 0; - u16 next_to_clean; - int tx_pkts = 0; - u16 req_id; - int rc; - - if (unlikely(!xdp_ring)) - return 0; - next_to_clean = xdp_ring->next_to_clean; - - while (tx_pkts < budget) { - struct ena_tx_buffer *tx_info; - struct xdp_frame *xdpf; - - rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, - &req_id); - if (rc) { - if (unlikely(rc == -EINVAL)) - handle_invalid_req_id(xdp_ring, req_id, NULL, - true); - break; - } - - /* validate that the request id points to a valid xdp_frame */ - rc = validate_xdp_req_id(xdp_ring, req_id); - if (rc) - break; - - tx_info = &xdp_ring->tx_buffer_info[req_id]; - xdpf = tx_info->xdpf; - - tx_info->xdpf = NULL; - tx_info->last_jiffies = 0; - ena_unmap_tx_buff(xdp_ring, tx_info); - - netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, - "tx_poll: q %d skb %p completed\n", xdp_ring->qid, - xdpf); - - tx_pkts++; - total_done += tx_info->tx_descs; - - xdp_return_frame(xdpf); - xdp_ring->free_ids[next_to_clean] = req_id; - next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, - xdp_ring->ring_size); - } - - xdp_ring->next_to_clean = next_to_clean; - ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); - ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); - - netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, - "tx_poll: q %d done. total pkts: %d\n", - xdp_ring->qid, tx_pkts); - - return tx_pkts; -} - static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); @@ -2326,28 +1747,36 @@ static void ena_del_napi_in_range(struct ena_adapter *adapter, for (i = first_index; i < first_index + count; i++) { netif_napi_del(&adapter->ena_napi[i].napi); - WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) && - adapter->ena_napi[i].xdp_ring); + WARN_ON(ENA_IS_XDP_INDEX(adapter, i) && + adapter->ena_napi[i].rx_ring); } } static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, int count) { + int (*napi_handler)(struct napi_struct *napi, int budget); int i; for (i = first_index; i < first_index + count; i++) { struct ena_napi *napi = &adapter->ena_napi[i]; + struct ena_ring *rx_ring, *tx_ring; - netif_napi_add(adapter->netdev, &napi->napi, - ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll); + memset(napi, 0, sizeof(*napi)); - if (!ENA_IS_XDP_INDEX(adapter, i)) { - napi->rx_ring = &adapter->rx_ring[i]; - napi->tx_ring = &adapter->tx_ring[i]; - } else { - napi->xdp_ring = &adapter->tx_ring[i]; - } + rx_ring = &adapter->rx_ring[i]; + tx_ring = &adapter->tx_ring[i]; + + napi_handler = ena_io_poll; + if (ENA_IS_XDP_INDEX(adapter, i)) + napi_handler = ena_xdp_io_poll; + + netif_napi_add(adapter->netdev, &napi->napi, napi_handler); + + if (!ENA_IS_XDP_INDEX(adapter, i)) + napi->rx_ring = rx_ring; + + napi->tx_ring = tx_ring; napi->qid = i; } } @@ -2475,8 +1904,8 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) return rc; } -static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, - int first_index, int count) +int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; @@ -2556,12 +1985,15 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) if (rc) goto create_err; INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work); + + ena_xdp_register_rxq_info(&adapter->rx_ring[i]); } return 0; create_err: while (i--) { + ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]); cancel_work_sync(&adapter->ena_napi[i].dim.work); ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); } @@ -2686,7 +2118,7 @@ err_setup_tx: } } -static int ena_up(struct ena_adapter *adapter) +int ena_up(struct ena_adapter *adapter) { int io_queue_count, rc, i; @@ -2748,7 +2180,7 @@ err_req_irq: return rc; } -static void ena_down(struct ena_adapter *adapter) +void ena_down(struct ena_adapter *adapter) { int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; @@ -3179,7 +2611,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set flags and meta data */ ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching); - rc = ena_xmit_common(dev, + rc = ena_xmit_common(adapter, tx_ring, tx_info, &ena_tx_ctx, @@ -3363,6 +2795,7 @@ static void ena_get_stats64(struct net_device *netdev, { struct ena_adapter *adapter = netdev_priv(netdev); struct ena_ring *rx_ring, *tx_ring; + u64 total_xdp_rx_drops = 0; unsigned int start; u64 rx_drops; u64 tx_drops; @@ -3371,8 +2804,8 @@ static void ena_get_stats64(struct net_device *netdev, if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) return; - for (i = 0; i < adapter->num_io_queues; i++) { - u64 bytes, packets; + for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { + u64 bytes, packets, xdp_rx_drops; tx_ring = &adapter->tx_ring[i]; @@ -3385,16 +2818,22 @@ static void ena_get_stats64(struct net_device *netdev, stats->tx_packets += packets; stats->tx_bytes += bytes; + /* In XDP there isn't an RX queue counterpart */ + if (ENA_IS_XDP_INDEX(adapter, i)) + continue; + rx_ring = &adapter->rx_ring[i]; do { start = u64_stats_fetch_begin(&rx_ring->syncp); packets = rx_ring->rx_stats.cnt; bytes = rx_ring->rx_stats.bytes; + xdp_rx_drops = rx_ring->rx_stats.xdp_drop; } while (u64_stats_fetch_retry(&rx_ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; + total_xdp_rx_drops += xdp_rx_drops; } do { @@ -3403,7 +2842,7 @@ static void ena_get_stats64(struct net_device *netdev, tx_drops = adapter->dev_stats.tx_drops; } while (u64_stats_fetch_retry(&adapter->syncp, start)); - stats->rx_dropped = rx_drops; + stats->rx_dropped = rx_drops + total_xdp_rx_drops; stats->tx_dropped = tx_drops; stats->multicast = 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 33c923e1261a..6d2cc20210cc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -110,19 +110,6 @@ #define ENA_MMIO_DISABLE_REG_READ BIT(0) -/* The max MTU size is configured to be the ethernet frame size without - * the overhead of the ethernet header, which can have a VLAN header, and - * a frame check sequence (FCS). - * The buffer size we share with the device is defined to be ENA_PAGE_SIZE - */ - -#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ - VLAN_HLEN - XDP_PACKET_HEADROOM - \ - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) - -#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ - ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) - struct ena_irq { irq_handler_t handler; void *data; @@ -138,13 +125,18 @@ struct ena_napi { struct napi_struct napi; struct ena_ring *tx_ring; struct ena_ring *rx_ring; - struct ena_ring *xdp_ring; u32 qid; struct dim dim; }; struct ena_tx_buffer { - struct sk_buff *skb; + union { + struct sk_buff *skb; + /* XDP buffer structure which is used for sending packets in + * the xdp queues + */ + struct xdp_frame *xdpf; + }; /* num of ena desc for this specific skb * (includes data desc and metadata desc) */ @@ -152,16 +144,14 @@ struct ena_tx_buffer { /* num of buffers used by this skb */ u32 num_of_bufs; - /* XDP buffer structure which is used for sending packets in - * the xdp queues - */ - struct xdp_frame *xdpf; + /* Total size of all buffers in bytes */ + u32 total_tx_size; /* Indicate if bufs[0] map the linear data of the skb. */ u8 map_linear_data; /* Used for detect missing tx packets to limit the number of prints */ - u32 print_once; + u8 print_once; /* Save the last jiffies to detect missing tx packets * * sets to non zero value on ena_start_xmit and set to zero on @@ -421,47 +411,44 @@ static inline void ena_reset_device(struct ena_adapter *adapter, set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); } -enum ena_xdp_errors_t { - ENA_XDP_ALLOWED = 0, - ENA_XDP_CURRENT_MTU_TOO_LARGE, - ENA_XDP_NO_ENOUGH_QUEUES, -}; +int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp); -enum ENA_XDP_ACTIONS { - ENA_XDP_PASS = 0, - ENA_XDP_TX = BIT(0), - ENA_XDP_REDIRECT = BIT(1), - ENA_XDP_DROP = BIT(2) -}; - -#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) - -static inline bool ena_xdp_present(struct ena_adapter *adapter) -{ - return !!adapter->xdp_bpf_prog; -} - -static inline bool ena_xdp_present_ring(struct ena_ring *ring) +/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ +static inline void ena_increase_stat(u64 *statp, u64 cnt, + struct u64_stats_sync *syncp) { - return !!ring->xdp_bpf_prog; + u64_stats_update_begin(syncp); + (*statp) += cnt; + u64_stats_update_end(syncp); } -static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter, - u32 queues) +static inline void ena_ring_tx_doorbell(struct ena_ring *tx_ring) { - return 2 * queues <= adapter->max_num_io_queues; -} - -static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) -{ - enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; - - if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) - rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; - else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) - rc = ENA_XDP_NO_ENOUGH_QUEUES; - - return rc; + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp); } +int ena_xmit_common(struct ena_adapter *adapter, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes); +void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info); +void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count); +int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count); +int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count); +void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count); +void ena_free_all_io_tx_resources(struct ena_adapter *adapter); +void ena_down(struct ena_adapter *adapter); +int ena_up(struct ena_adapter *adapter); +void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring); +void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); #endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c new file mode 100644 index 000000000000..fc1c4ef73ba3 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c @@ -0,0 +1,468 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "ena_xdp.h" + +static int validate_xdp_req_id(struct ena_ring *tx_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info; + + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; + + return handle_invalid_req_id(tx_ring, req_id, tx_info, true); +} + +static int ena_xdp_tx_map_frame(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info, + struct xdp_frame *xdpf, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_adapter *adapter = tx_ring->adapter; + struct ena_com_buf *ena_buf; + int push_len = 0; + dma_addr_t dma; + void *data; + u32 size; + + tx_info->xdpf = xdpf; + data = tx_info->xdpf->data; + size = tx_info->xdpf->len; + + if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* Designate part of the packet for LLQ */ + push_len = min_t(u32, size, tx_ring->tx_max_header_size); + + ena_tx_ctx->push_header = data; + + size -= push_len; + data += push_len; + } + + ena_tx_ctx->header_len = push_len; + + if (size > 0) { + dma = dma_map_single(tx_ring->dev, + data, + size, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) + goto error_report_dma_error; + + tx_info->map_linear_data = 0; + + ena_buf = tx_info->bufs; + ena_buf->paddr = dma; + ena_buf->len = size; + + ena_tx_ctx->ena_bufs = ena_buf; + ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; + } + + return 0; + +error_report_dma_error: + ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1, + &tx_ring->syncp); + netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); + + return -EINVAL; +} + +int ena_xdp_xmit_frame(struct ena_ring *tx_ring, + struct ena_adapter *adapter, + struct xdp_frame *xdpf, + int flags) +{ + struct ena_com_tx_ctx ena_tx_ctx = {}; + struct ena_tx_buffer *tx_info; + u16 next_to_use, req_id; + int rc; + + next_to_use = tx_ring->next_to_use; + req_id = tx_ring->free_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx); + if (unlikely(rc)) + return rc; + + ena_tx_ctx.req_id = req_id; + + rc = ena_xmit_common(adapter, + tx_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + xdpf->len); + if (rc) + goto error_unmap_dma; + + /* trigger the dma engine. ena_ring_tx_doorbell() + * calls a memory barrier inside it. + */ + if (flags & XDP_XMIT_FLUSH) + ena_ring_tx_doorbell(tx_ring); + + return rc; + +error_unmap_dma: + ena_unmap_tx_buff(tx_ring, tx_info); + tx_info->xdpf = NULL; + return rc; +} + +int ena_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_ring *tx_ring; + int qid, i, nxmit = 0; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return -ENETDOWN; + + /* We assume that all rings have the same XDP program */ + if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog)) + return -ENXIO; + + qid = smp_processor_id() % adapter->xdp_num_queues; + qid += adapter->xdp_first_ring; + tx_ring = &adapter->tx_ring[qid]; + + /* Other CPU ids might try to send thorugh this queue */ + spin_lock(&tx_ring->xdp_tx_lock); + + for (i = 0; i < n; i++) { + if (ena_xdp_xmit_frame(tx_ring, adapter, frames[i], 0)) + break; + nxmit++; + } + + /* Ring doorbell to make device aware of the packets */ + if (flags & XDP_XMIT_FLUSH) + ena_ring_tx_doorbell(tx_ring); + + spin_unlock(&tx_ring->xdp_tx_lock); + + /* Return number of packets sent */ + return nxmit; +} + +static void ena_init_all_xdp_queues(struct ena_adapter *adapter) +{ + adapter->xdp_first_ring = adapter->num_io_queues; + adapter->xdp_num_queues = adapter->num_io_queues; + + ena_init_io_rings(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); +} + +int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) +{ + u32 xdp_first_ring = adapter->xdp_first_ring; + u32 xdp_num_queues = adapter->xdp_num_queues; + int rc = 0; + + rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); + if (rc) + goto setup_err; + + rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); + if (rc) + goto create_err; + + return 0; + +create_err: + ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); +setup_err: + return rc; +} + +/* Provides a way for both kernel and bpf-prog to know + * more about the RX-queue a given XDP frame arrived on. + */ +int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) +{ + int rc; + + rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0); + + netif_dbg(rx_ring->adapter, ifup, rx_ring->netdev, "Registering RX info for queue %d", + rx_ring->qid); + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + goto err; + } + + rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + +err: + return rc; +} + +void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) +{ + netif_dbg(rx_ring->adapter, ifdown, rx_ring->netdev, + "Unregistering RX info for queue %d", + rx_ring->qid); + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); +} + +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, int count) +{ + struct bpf_prog *old_bpf_prog; + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; + old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); + + if (!old_bpf_prog && prog) { + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; + } else if (old_bpf_prog && !prog) { + rx_ring->rx_headroom = NET_SKB_PAD; + } + } +} + +static void ena_xdp_exchange_program(struct ena_adapter *adapter, + struct bpf_prog *prog) +{ + struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); + + ena_xdp_exchange_program_rx_in_range(adapter, + prog, + 0, + adapter->num_io_queues); + + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); +} + +static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) +{ + bool was_up; + int rc; + + was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + if (was_up) + ena_down(adapter); + + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + ena_xdp_exchange_program(adapter, NULL); + if (was_up) { + rc = ena_up(adapter); + if (rc) + return rc; + } + return 0; +} + +static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *prog = bpf->prog; + struct bpf_prog *old_bpf_prog; + int rc, prev_mtu; + bool is_up; + + is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + rc = ena_xdp_allowed(adapter); + if (rc == ENA_XDP_ALLOWED) { + old_bpf_prog = adapter->xdp_bpf_prog; + if (prog) { + if (!is_up) { + ena_init_all_xdp_queues(adapter); + } else if (!old_bpf_prog) { + ena_down(adapter); + ena_init_all_xdp_queues(adapter); + } + ena_xdp_exchange_program(adapter, prog); + + netif_dbg(adapter, drv, adapter->netdev, "Set a new XDP program\n"); + + if (is_up && !old_bpf_prog) { + rc = ena_up(adapter); + if (rc) + return rc; + } + xdp_features_set_redirect_target(netdev, false); + } else if (old_bpf_prog) { + xdp_features_clear_redirect_target(netdev); + netif_dbg(adapter, drv, adapter->netdev, "Removing XDP program\n"); + + rc = ena_destroy_and_free_all_xdp_queues(adapter); + if (rc) + return rc; + } + + prev_mtu = netdev->max_mtu; + netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; + + if (!old_bpf_prog) + netif_info(adapter, drv, adapter->netdev, + "XDP program is set, changing the max_mtu from %d to %d", + prev_mtu, netdev->max_mtu); + + } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", + netdev->mtu, ENA_XDP_MAX_MTU); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); + return -EINVAL; + } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", + adapter->num_io_queues, adapter->max_num_io_queues); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); + return -EINVAL; + } + + return 0; +} + +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp + * program as well as to query the current xdp program id. + */ +int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case XDP_SETUP_PROG: + return ena_xdp_set(netdev, bpf); + default: + return -EINVAL; + } + return 0; +} + +static int ena_clean_xdp_irq(struct ena_ring *tx_ring, u32 budget) +{ + u32 total_done = 0; + u16 next_to_clean; + int tx_pkts = 0; + u16 req_id; + int rc; + + if (unlikely(!tx_ring)) + return 0; + next_to_clean = tx_ring->next_to_clean; + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct xdp_frame *xdpf; + + rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, + &req_id); + if (rc) { + if (unlikely(rc == -EINVAL)) + handle_invalid_req_id(tx_ring, req_id, NULL, true); + break; + } + + /* validate that the request id points to a valid xdp_frame */ + rc = validate_xdp_req_id(tx_ring, req_id); + if (rc) + break; + + tx_info = &tx_ring->tx_buffer_info[req_id]; + + tx_info->last_jiffies = 0; + + xdpf = tx_info->xdpf; + tx_info->xdpf = NULL; + ena_unmap_tx_buff(tx_ring, tx_info); + xdp_return_frame(xdpf); + + tx_pkts++; + total_done += tx_info->tx_descs; + tx_ring->free_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + tx_ring->ring_size); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d pkt #%d req_id %d\n", tx_ring->qid, tx_pkts, req_id); + } + + tx_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + tx_ring->qid, tx_pkts); + + return tx_pkts; +} + +/* This is the XDP napi callback. XDP queues use a separate napi callback + * than Rx/Tx queues. + */ +int ena_xdp_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + struct ena_ring *tx_ring; + u32 work_done; + int ret; + + tx_ring = ena_napi->tx_ring; + + if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + work_done = ena_clean_xdp_irq(tx_ring, budget); + + /* If the device is about to reset or down, avoid unmask + * the interrupt and return 0 so NAPI won't reschedule + */ + if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags))) { + napi_complete_done(napi, 0); + ret = 0; + } else if (budget > work_done) { + ena_increase_stat(&tx_ring->tx_stats.napi_comp, 1, + &tx_ring->syncp); + if (napi_complete_done(napi, work_done)) + ena_unmask_interrupt(tx_ring, NULL); + + ena_update_ring_numa_node(tx_ring, NULL); + ret = work_done; + } else { + ret = budget; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.tx_poll++; + u64_stats_update_end(&tx_ring->syncp); + tx_ring->tx_stats.last_napi_jiffies = jiffies; + + return ret; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.h b/drivers/net/ethernet/amazon/ena/ena_xdp.h new file mode 100644 index 000000000000..cfd82728486a --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_xdp.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef ENA_XDP_H +#define ENA_XDP_H + +#include "ena_netdev.h" +#include <linux/bpf_trace.h> + +/* The max MTU size is configured to be the ethernet frame size without + * the overhead of the ethernet header, which can have a VLAN header, and + * a frame check sequence (FCS). + * The buffer size we share with the device is defined to be ENA_PAGE_SIZE + */ +#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ + VLAN_HLEN - XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ + ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) + +enum ENA_XDP_ACTIONS { + ENA_XDP_PASS = 0, + ENA_XDP_TX = BIT(0), + ENA_XDP_REDIRECT = BIT(1), + ENA_XDP_DROP = BIT(2) +}; + +#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) + +int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter); +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, int count); +int ena_xdp_io_poll(struct napi_struct *napi, int budget); +int ena_xdp_xmit_frame(struct ena_ring *tx_ring, + struct ena_adapter *adapter, + struct xdp_frame *xdpf, + int flags); +int ena_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags); +int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf); +int ena_xdp_register_rxq_info(struct ena_ring *rx_ring); +void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring); + +enum ena_xdp_errors_t { + ENA_XDP_ALLOWED = 0, + ENA_XDP_CURRENT_MTU_TOO_LARGE, + ENA_XDP_NO_ENOUGH_QUEUES, +}; + +static inline bool ena_xdp_present(struct ena_adapter *adapter) +{ + return !!adapter->xdp_bpf_prog; +} + +static inline bool ena_xdp_present_ring(struct ena_ring *ring) +{ + return !!ring->xdp_bpf_prog; +} + +static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter, + u32 queues) +{ + return 2 * queues <= adapter->max_num_io_queues; +} + +static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) +{ + enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; + + if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) + rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; + else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) + rc = ENA_XDP_NO_ENOUGH_QUEUES; + + return rc; +} + +static inline int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) +{ + u32 verdict = ENA_XDP_PASS; + struct bpf_prog *xdp_prog; + struct ena_ring *xdp_ring; + struct xdp_frame *xdpf; + u64 *xdp_stat; + + xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); + + verdict = bpf_prog_run_xdp(xdp_prog, xdp); + + switch (verdict) { + case XDP_TX: + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; + break; + } + + /* Find xmit queue */ + xdp_ring = rx_ring->xdp_ring; + + /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */ + spin_lock(&xdp_ring->xdp_tx_lock); + + if (ena_xdp_xmit_frame(xdp_ring, rx_ring->adapter, xdpf, + XDP_XMIT_FLUSH)) + xdp_return_frame(xdpf); + + spin_unlock(&xdp_ring->xdp_tx_lock); + xdp_stat = &rx_ring->rx_stats.xdp_tx; + verdict = ENA_XDP_TX; + break; + case XDP_REDIRECT: + if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { + xdp_stat = &rx_ring->rx_stats.xdp_redirect; + verdict = ENA_XDP_REDIRECT; + break; + } + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; + break; + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; + break; + case XDP_DROP: + xdp_stat = &rx_ring->rx_stats.xdp_drop; + verdict = ENA_XDP_DROP; + break; + case XDP_PASS: + xdp_stat = &rx_ring->rx_stats.xdp_pass; + verdict = ENA_XDP_PASS; + break; + default: + bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_invalid; + verdict = ENA_XDP_DROP; + } + + ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); + + return verdict; +} +#endif /* ENA_XDP_H */ diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 5935be190b9e..5f2a6fcba967 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -866,10 +866,13 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter) netdev_err(adapter->netdev, "offset(%d) > ring size(%d) !!\n", offset, adapter->ring_size); err = -1; - goto failed; + goto free_buffer; } return 0; +free_buffer: + kfree(tx_ring->tx_buffer); + tx_ring->tx_buffer = NULL; failed: if (adapter->ring_vir_addr != NULL) { dma_free_coherent(&pdev->dev, adapter->ring_size, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1f956929191d..8f10ab4d4a43 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4199,13 +4199,22 @@ static void bnxt_init_vnics(struct bnxt *bp) vnic->fw_l2_ctx_id = INVALID_HW_RING_ID; if (bp->vnic_info[i].rss_hash_key) { - if (i == 0) + if (!i) { + u8 *key = (void *)vnic->rss_hash_key; + int k; + + bp->toeplitz_prefix = 0; get_random_bytes(vnic->rss_hash_key, HW_HASH_KEY_SIZE); - else + for (k = 0; k < 8; k++) { + bp->toeplitz_prefix <<= 8; + bp->toeplitz_prefix |= key[k]; + } + } else { memcpy(vnic->rss_hash_key, bp->vnic_info[0].rss_hash_key, HW_HASH_KEY_SIZE); + } } } } @@ -4789,9 +4798,8 @@ static void bnxt_clear_ring_indices(struct bnxt *bp) } } -static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool irq_reinit) +static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all) { -#ifdef CONFIG_RFS_ACCEL int i; /* Under rtnl_lock and all our NAPIs have been disabled. It's @@ -4803,40 +4811,73 @@ static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool irq_reinit) struct bnxt_ntuple_filter *fltr; head = &bp->ntp_fltr_hash_tbl[i]; - hlist_for_each_entry_safe(fltr, tmp, head, hash) { - hlist_del(&fltr->hash); + hlist_for_each_entry_safe(fltr, tmp, head, base.hash) { + bnxt_del_l2_filter(bp, fltr->l2_fltr); + if (!all && (fltr->base.flags & BNXT_ACT_FUNC_DST)) + continue; + hlist_del(&fltr->base.hash); + clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap); + bp->ntp_fltr_count--; kfree(fltr); } } - if (irq_reinit) { - bitmap_free(bp->ntp_fltr_bmap); - bp->ntp_fltr_bmap = NULL; - } + if (!all) + return; + + bitmap_free(bp->ntp_fltr_bmap); + bp->ntp_fltr_bmap = NULL; bp->ntp_fltr_count = 0; -#endif } static int bnxt_alloc_ntp_fltrs(struct bnxt *bp) { -#ifdef CONFIG_RFS_ACCEL int i, rc = 0; - if (!(bp->flags & BNXT_FLAG_RFS)) + if (!(bp->flags & BNXT_FLAG_RFS) || bp->ntp_fltr_bmap) return 0; for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) INIT_HLIST_HEAD(&bp->ntp_fltr_hash_tbl[i]); bp->ntp_fltr_count = 0; - bp->ntp_fltr_bmap = bitmap_zalloc(BNXT_NTP_FLTR_MAX_FLTR, GFP_KERNEL); + bp->ntp_fltr_bmap = bitmap_zalloc(BNXT_MAX_FLTR, GFP_KERNEL); if (!bp->ntp_fltr_bmap) rc = -ENOMEM; return rc; -#else - return 0; -#endif +} + +static void bnxt_free_l2_filters(struct bnxt *bp, bool all) +{ + int i; + + for (i = 0; i < BNXT_L2_FLTR_HASH_SIZE; i++) { + struct hlist_head *head; + struct hlist_node *tmp; + struct bnxt_l2_filter *fltr; + + head = &bp->l2_fltr_hash_tbl[i]; + hlist_for_each_entry_safe(fltr, tmp, head, base.hash) { + if (!all && (fltr->base.flags & BNXT_ACT_FUNC_DST)) + continue; + hlist_del(&fltr->base.hash); + if (fltr->base.flags) { + clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap); + bp->ntp_fltr_count--; + } + kfree(fltr); + } + } +} + +static void bnxt_init_l2_fltr_tbl(struct bnxt *bp) +{ + int i; + + for (i = 0; i < BNXT_L2_FLTR_HASH_SIZE; i++) + INIT_HLIST_HEAD(&bp->l2_fltr_hash_tbl[i]); + get_random_bytes(&bp->hash_seed, sizeof(bp->hash_seed)); } static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) @@ -4846,7 +4887,8 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_rx_rings(bp); bnxt_free_cp_rings(bp); bnxt_free_all_cp_arrays(bp); - bnxt_free_ntp_fltrs(bp, irq_re_init); + bnxt_free_ntp_fltrs(bp, false); + bnxt_free_l2_filters(bp, false); if (irq_re_init) { bnxt_free_ring_stats(bp); if (!(bp->phy_flags & BNXT_PHY_FL_PORT_STATS_NO_RESET) || @@ -5290,25 +5332,301 @@ static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id) return hwrm_req_send_silent(bp, req); } +void bnxt_del_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr) +{ + if (!atomic_dec_and_test(&fltr->refcnt)) + return; + spin_lock_bh(&bp->ntp_fltr_lock); + if (!test_and_clear_bit(BNXT_FLTR_INSERTED, &fltr->base.state)) { + spin_unlock_bh(&bp->ntp_fltr_lock); + return; + } + hlist_del_rcu(&fltr->base.hash); + if (fltr->base.flags) { + clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap); + bp->ntp_fltr_count--; + } + spin_unlock_bh(&bp->ntp_fltr_lock); + kfree_rcu(fltr, base.rcu); +} + +static struct bnxt_l2_filter *__bnxt_lookup_l2_filter(struct bnxt *bp, + struct bnxt_l2_key *key, + u32 idx) +{ + struct hlist_head *head = &bp->l2_fltr_hash_tbl[idx]; + struct bnxt_l2_filter *fltr; + + hlist_for_each_entry_rcu(fltr, head, base.hash) { + struct bnxt_l2_key *l2_key = &fltr->l2_key; + + if (ether_addr_equal(l2_key->dst_mac_addr, key->dst_mac_addr) && + l2_key->vlan == key->vlan) + return fltr; + } + return NULL; +} + +static struct bnxt_l2_filter *bnxt_lookup_l2_filter(struct bnxt *bp, + struct bnxt_l2_key *key, + u32 idx) +{ + struct bnxt_l2_filter *fltr = NULL; + + rcu_read_lock(); + fltr = __bnxt_lookup_l2_filter(bp, key, idx); + if (fltr) + atomic_inc(&fltr->refcnt); + rcu_read_unlock(); + return fltr; +} + +#define BNXT_IPV4_4TUPLE(bp, fkeys) \ + (((fkeys)->basic.ip_proto == IPPROTO_TCP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4) || \ + ((fkeys)->basic.ip_proto == IPPROTO_UDP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4)) + +#define BNXT_IPV6_4TUPLE(bp, fkeys) \ + (((fkeys)->basic.ip_proto == IPPROTO_TCP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6) || \ + ((fkeys)->basic.ip_proto == IPPROTO_UDP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6)) + +static u32 bnxt_get_rss_flow_tuple_len(struct bnxt *bp, struct flow_keys *fkeys) +{ + if (fkeys->basic.n_proto == htons(ETH_P_IP)) { + if (BNXT_IPV4_4TUPLE(bp, fkeys)) + return sizeof(fkeys->addrs.v4addrs) + + sizeof(fkeys->ports); + + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4) + return sizeof(fkeys->addrs.v4addrs); + } + + if (fkeys->basic.n_proto == htons(ETH_P_IPV6)) { + if (BNXT_IPV6_4TUPLE(bp, fkeys)) + return sizeof(fkeys->addrs.v6addrs) + + sizeof(fkeys->ports); + + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6) + return sizeof(fkeys->addrs.v6addrs); + } + + return 0; +} + +static u32 bnxt_toeplitz(struct bnxt *bp, struct flow_keys *fkeys, + const unsigned char *key) +{ + u64 prefix = bp->toeplitz_prefix, hash = 0; + struct bnxt_ipv4_tuple tuple4; + struct bnxt_ipv6_tuple tuple6; + int i, j, len = 0; + u8 *four_tuple; + + len = bnxt_get_rss_flow_tuple_len(bp, fkeys); + if (!len) + return 0; + + if (fkeys->basic.n_proto == htons(ETH_P_IP)) { + tuple4.v4addrs = fkeys->addrs.v4addrs; + tuple4.ports = fkeys->ports; + four_tuple = (unsigned char *)&tuple4; + } else { + tuple6.v6addrs = fkeys->addrs.v6addrs; + tuple6.ports = fkeys->ports; + four_tuple = (unsigned char *)&tuple6; + } + + for (i = 0, j = 8; i < len; i++, j++) { + u8 byte = four_tuple[i]; + int bit; + + for (bit = 0; bit < 8; bit++, prefix <<= 1, byte <<= 1) { + if (byte & 0x80) + hash ^= prefix; + } + prefix |= (j < HW_HASH_KEY_SIZE) ? key[j] : 0; + } + + /* The valid part of the hash is in the upper 32 bits. */ + return (hash >> 32) & BNXT_NTP_FLTR_HASH_MASK; +} + #ifdef CONFIG_RFS_ACCEL -static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, - struct bnxt_ntuple_filter *fltr) +static struct bnxt_l2_filter * +bnxt_lookup_l2_filter_from_key(struct bnxt *bp, struct bnxt_l2_key *key) +{ + struct bnxt_l2_filter *fltr; + u32 idx; + + idx = jhash2(&key->filter_key, BNXT_L2_KEY_SIZE, bp->hash_seed) & + BNXT_L2_FLTR_HASH_MASK; + fltr = bnxt_lookup_l2_filter(bp, key, idx); + return fltr; +} +#endif + +static int bnxt_init_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr, + struct bnxt_l2_key *key, u32 idx) +{ + struct hlist_head *head; + + ether_addr_copy(fltr->l2_key.dst_mac_addr, key->dst_mac_addr); + fltr->l2_key.vlan = key->vlan; + fltr->base.type = BNXT_FLTR_TYPE_L2; + if (fltr->base.flags) { + int bit_id; + + bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap, + BNXT_MAX_FLTR, 0); + if (bit_id < 0) + return -ENOMEM; + fltr->base.sw_id = (u16)bit_id; + } + head = &bp->l2_fltr_hash_tbl[idx]; + hlist_add_head_rcu(&fltr->base.hash, head); + set_bit(BNXT_FLTR_INSERTED, &fltr->base.state); + atomic_set(&fltr->refcnt, 1); + return 0; +} + +static struct bnxt_l2_filter *bnxt_alloc_l2_filter(struct bnxt *bp, + struct bnxt_l2_key *key, + gfp_t gfp) +{ + struct bnxt_l2_filter *fltr; + u32 idx; + int rc; + + idx = jhash2(&key->filter_key, BNXT_L2_KEY_SIZE, bp->hash_seed) & + BNXT_L2_FLTR_HASH_MASK; + fltr = bnxt_lookup_l2_filter(bp, key, idx); + if (fltr) + return fltr; + + fltr = kzalloc(sizeof(*fltr), gfp); + if (!fltr) + return ERR_PTR(-ENOMEM); + spin_lock_bh(&bp->ntp_fltr_lock); + rc = bnxt_init_l2_filter(bp, fltr, key, idx); + spin_unlock_bh(&bp->ntp_fltr_lock); + if (rc) { + bnxt_del_l2_filter(bp, fltr); + fltr = ERR_PTR(rc); + } + return fltr; +} + +static u16 bnxt_vf_target_id(struct bnxt_pf_info *pf, u16 vf_idx) +{ +#ifdef CONFIG_BNXT_SRIOV + struct bnxt_vf_info *vf = &pf->vf[vf_idx]; + + return vf->fw_fid; +#else + return INVALID_HW_RING_ID; +#endif +} + +int bnxt_hwrm_l2_filter_free(struct bnxt *bp, struct bnxt_l2_filter *fltr) +{ + struct hwrm_cfa_l2_filter_free_input *req; + u16 target_id = 0xffff; + int rc; + + if (fltr->base.flags & BNXT_ACT_FUNC_DST) { + struct bnxt_pf_info *pf = &bp->pf; + + if (fltr->base.vf_idx >= pf->active_vfs) + return -EINVAL; + + target_id = bnxt_vf_target_id(pf, fltr->base.vf_idx); + if (target_id == INVALID_HW_RING_ID) + return -EINVAL; + } + + rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE); + if (rc) + return rc; + + req->target_id = cpu_to_le16(target_id); + req->l2_filter_id = fltr->base.filter_id; + return hwrm_req_send(bp, req); +} + +int bnxt_hwrm_l2_filter_alloc(struct bnxt *bp, struct bnxt_l2_filter *fltr) +{ + struct hwrm_cfa_l2_filter_alloc_output *resp; + struct hwrm_cfa_l2_filter_alloc_input *req; + u16 target_id = 0xffff; + int rc; + + if (fltr->base.flags & BNXT_ACT_FUNC_DST) { + struct bnxt_pf_info *pf = &bp->pf; + + if (fltr->base.vf_idx >= pf->active_vfs) + return -EINVAL; + + target_id = bnxt_vf_target_id(pf, fltr->base.vf_idx); + } + rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_ALLOC); + if (rc) + return rc; + + req->target_id = cpu_to_le16(target_id); + req->flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX); + + if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) + req->flags |= + cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST); + req->dst_id = cpu_to_le16(fltr->base.fw_vnic_id); + req->enables = + cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK); + ether_addr_copy(req->l2_addr, fltr->l2_key.dst_mac_addr); + eth_broadcast_addr(req->l2_addr_mask); + + if (fltr->l2_key.vlan) { + req->enables |= + cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN_MASK | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_NUM_VLANS); + req->num_vlans = 1; + req->l2_ivlan = cpu_to_le16(fltr->l2_key.vlan); + req->l2_ivlan_mask = cpu_to_le16(0xfff); + } + + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send(bp, req); + if (!rc) { + fltr->base.filter_id = resp->l2_filter_id; + set_bit(BNXT_FLTR_VALID, &fltr->base.state); + } + hwrm_req_drop(bp, req); + return rc; +} + +int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr) { struct hwrm_cfa_ntuple_filter_free_input *req; int rc; + set_bit(BNXT_FLTR_FW_DELETED, &fltr->base.state); rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_FREE); if (rc) return rc; - req->ntuple_filter_id = fltr->filter_id; + req->ntuple_filter_id = fltr->base.filter_id; return hwrm_req_send(bp, req); } #define BNXT_NTP_FLTR_FLAGS \ (CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_L2_FILTER_ID | \ CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE | \ - CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR | \ CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE | \ CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR | \ CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR_MASK | \ @@ -5324,12 +5642,21 @@ static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, #define BNXT_NTP_TUNNEL_FLTR_FLAG \ CFA_NTUPLE_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE -static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, - struct bnxt_ntuple_filter *fltr) +void bnxt_fill_ipv6_mask(__be32 mask[4]) +{ + int i; + + for (i = 0; i < 4; i++) + mask[i] = cpu_to_be32(~0); +} + +int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr) { struct hwrm_cfa_ntuple_filter_alloc_output *resp; struct hwrm_cfa_ntuple_filter_alloc_input *req; struct flow_keys *keys = &fltr->fkeys; + struct bnxt_l2_filter *l2_fltr; struct bnxt_vnic_info *vnic; u32 flags = 0; int rc; @@ -5338,42 +5665,47 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, if (rc) return rc; - req->l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx]; + l2_fltr = fltr->l2_fltr; + req->l2_filter_id = l2_fltr->base.filter_id; + if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) { flags = CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX; - req->dst_id = cpu_to_le16(fltr->rxq); + req->dst_id = cpu_to_le16(fltr->base.rxq); } else { - vnic = &bp->vnic_info[fltr->rxq + 1]; + vnic = &bp->vnic_info[fltr->base.rxq + 1]; req->dst_id = cpu_to_le16(vnic->fw_vnic_id); } req->flags = cpu_to_le32(flags); req->enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS); req->ethertype = htons(ETH_P_IP); - memcpy(req->src_macaddr, fltr->src_mac_addr, ETH_ALEN); req->ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4; req->ip_protocol = keys->basic.ip_proto; if (keys->basic.n_proto == htons(ETH_P_IPV6)) { - int i; - req->ethertype = htons(ETH_P_IPV6); req->ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6; - *(struct in6_addr *)&req->src_ipaddr[0] = - keys->addrs.v6addrs.src; - *(struct in6_addr *)&req->dst_ipaddr[0] = - keys->addrs.v6addrs.dst; - for (i = 0; i < 4; i++) { - req->src_ipaddr_mask[i] = cpu_to_be32(0xffffffff); - req->dst_ipaddr_mask[i] = cpu_to_be32(0xffffffff); + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) { + *(struct in6_addr *)&req->src_ipaddr[0] = + keys->addrs.v6addrs.src; + bnxt_fill_ipv6_mask(req->src_ipaddr_mask); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) { + *(struct in6_addr *)&req->dst_ipaddr[0] = + keys->addrs.v6addrs.dst; + bnxt_fill_ipv6_mask(req->dst_ipaddr_mask); } } else { - req->src_ipaddr[0] = keys->addrs.v4addrs.src; - req->src_ipaddr_mask[0] = cpu_to_be32(0xffffffff); - req->dst_ipaddr[0] = keys->addrs.v4addrs.dst; - req->dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff); + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) { + req->src_ipaddr[0] = keys->addrs.v4addrs.src; + req->src_ipaddr_mask[0] = cpu_to_be32(0xffffffff); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) { + req->dst_ipaddr[0] = keys->addrs.v4addrs.dst; + req->dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff); + } } if (keys->control.flags & FLOW_DIS_ENCAPSULATION) { req->enables |= cpu_to_le32(BNXT_NTP_TUNNEL_FLTR_FLAG); @@ -5381,79 +5713,63 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL; } - req->src_port = keys->ports.src; - req->src_port_mask = cpu_to_be16(0xffff); - req->dst_port = keys->ports.dst; - req->dst_port_mask = cpu_to_be16(0xffff); + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) { + req->src_port = keys->ports.src; + req->src_port_mask = cpu_to_be16(0xffff); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) { + req->dst_port = keys->ports.dst; + req->dst_port_mask = cpu_to_be16(0xffff); + } resp = hwrm_req_hold(bp, req); rc = hwrm_req_send(bp, req); if (!rc) - fltr->filter_id = resp->ntuple_filter_id; + fltr->base.filter_id = resp->ntuple_filter_id; hwrm_req_drop(bp, req); return rc; } -#endif static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx, const u8 *mac_addr) { - struct hwrm_cfa_l2_filter_alloc_output *resp; - struct hwrm_cfa_l2_filter_alloc_input *req; + struct bnxt_l2_filter *fltr; + struct bnxt_l2_key key; int rc; - rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_ALLOC); - if (rc) - return rc; - - req->flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX); - if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) - req->flags |= - cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST); - req->dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id); - req->enables = - cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR | - CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID | - CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK); - memcpy(req->l2_addr, mac_addr, ETH_ALEN); - req->l2_addr_mask[0] = 0xff; - req->l2_addr_mask[1] = 0xff; - req->l2_addr_mask[2] = 0xff; - req->l2_addr_mask[3] = 0xff; - req->l2_addr_mask[4] = 0xff; - req->l2_addr_mask[5] = 0xff; + ether_addr_copy(key.dst_mac_addr, mac_addr); + key.vlan = 0; + fltr = bnxt_alloc_l2_filter(bp, &key, GFP_KERNEL); + if (IS_ERR(fltr)) + return PTR_ERR(fltr); - resp = hwrm_req_hold(bp, req); - rc = hwrm_req_send(bp, req); - if (!rc) - bp->vnic_info[vnic_id].fw_l2_filter_id[idx] = - resp->l2_filter_id; - hwrm_req_drop(bp, req); + fltr->base.fw_vnic_id = bp->vnic_info[vnic_id].fw_vnic_id; + rc = bnxt_hwrm_l2_filter_alloc(bp, fltr); + if (rc) + bnxt_del_l2_filter(bp, fltr); + else + bp->vnic_info[vnic_id].l2_filters[idx] = fltr; return rc; } static int bnxt_hwrm_clear_vnic_filter(struct bnxt *bp) { - struct hwrm_cfa_l2_filter_free_input *req; u16 i, j, num_of_vnics = 1; /* only vnic 0 supported */ - int rc; + int rc = 0; /* Any associated ntuple filters will also be cleared by firmware. */ - rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE); - if (rc) - return rc; - hwrm_req_hold(bp, req); for (i = 0; i < num_of_vnics; i++) { struct bnxt_vnic_info *vnic = &bp->vnic_info[i]; for (j = 0; j < vnic->uc_filter_count; j++) { - req->l2_filter_id = vnic->fw_l2_filter_id[j]; + struct bnxt_l2_filter *fltr = vnic->l2_filters[j]; - rc = hwrm_req_send(bp, req); + bnxt_hwrm_l2_filter_free(bp, fltr); + bnxt_del_l2_filter(bp, fltr); } vnic->uc_filter_count = 0; } - hwrm_req_drop(bp, req); + return rc; } @@ -9370,7 +9686,6 @@ static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) static int bnxt_alloc_rfs_vnics(struct bnxt *bp) { -#ifdef CONFIG_RFS_ACCEL int i, rc = 0; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) @@ -9399,9 +9714,6 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) break; } return rc; -#else - return 0; -#endif } /* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */ @@ -9729,7 +10041,6 @@ static int bnxt_setup_int_mode(struct bnxt *bp) return rc; } -#ifdef CONFIG_RFS_ACCEL static unsigned int bnxt_get_max_func_rss_ctxs(struct bnxt *bp) { return bp->hw_resc.max_rsscos_ctxs; @@ -9739,7 +10050,6 @@ static unsigned int bnxt_get_max_func_vnics(struct bnxt *bp) { return bp->hw_resc.max_vnics; } -#endif unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp) { @@ -11742,7 +12052,6 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp) { struct net_device *dev = bp->dev; struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; - struct hwrm_cfa_l2_filter_free_input *req; struct netdev_hw_addr *ha; int i, off = 0, rc; bool uc_update; @@ -11754,16 +12063,12 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp) if (!uc_update) goto skip_uc; - rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE); - if (rc) - return rc; - hwrm_req_hold(bp, req); for (i = 1; i < vnic->uc_filter_count; i++) { - req->l2_filter_id = vnic->fw_l2_filter_id[i]; + struct bnxt_l2_filter *fltr = vnic->l2_filters[i]; - rc = hwrm_req_send(bp, req); + bnxt_hwrm_l2_filter_free(bp, fltr); + bnxt_del_l2_filter(bp, fltr); } - hwrm_req_drop(bp, req); vnic->uc_filter_count = 1; @@ -11858,7 +12163,6 @@ static bool bnxt_rfs_supported(struct bnxt *bp) /* If runtime conditions support RFS */ static bool bnxt_rfs_capable(struct bnxt *bp) { -#ifdef CONFIG_RFS_ACCEL int vnics, max_vnics, max_rss_ctxs; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) @@ -11894,9 +12198,6 @@ static bool bnxt_rfs_capable(struct bnxt *bp) netdev_warn(bp->dev, "Unable to reserve resources to support NTUPLE filters.\n"); bnxt_hwrm_reserve_rings(bp, 0, 0, 0, 0, 0, 1); return false; -#else - return false; -#endif } static netdev_features_t bnxt_fix_features(struct net_device *dev, @@ -13559,38 +13860,102 @@ static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -#ifdef CONFIG_RFS_ACCEL +u32 bnxt_get_ntp_filter_idx(struct bnxt *bp, struct flow_keys *fkeys, + const struct sk_buff *skb) +{ + struct bnxt_vnic_info *vnic; + + if (skb) + return skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK; + + vnic = &bp->vnic_info[0]; + return bnxt_toeplitz(bp, fkeys, (void *)vnic->rss_hash_key); +} + +int bnxt_insert_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr, + u32 idx) +{ + struct hlist_head *head; + int bit_id; + + spin_lock_bh(&bp->ntp_fltr_lock); + bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap, BNXT_MAX_FLTR, 0); + if (bit_id < 0) { + spin_unlock_bh(&bp->ntp_fltr_lock); + return -ENOMEM; + } + + fltr->base.sw_id = (u16)bit_id; + fltr->base.type = BNXT_FLTR_TYPE_NTUPLE; + fltr->base.flags |= BNXT_ACT_RING_DST; + head = &bp->ntp_fltr_hash_tbl[idx]; + hlist_add_head_rcu(&fltr->base.hash, head); + set_bit(BNXT_FLTR_INSERTED, &fltr->base.state); + bp->ntp_fltr_count++; + spin_unlock_bh(&bp->ntp_fltr_lock); + return 0; +} + static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1, struct bnxt_ntuple_filter *f2) { struct flow_keys *keys1 = &f1->fkeys; struct flow_keys *keys2 = &f2->fkeys; + if (f1->ntuple_flags != f2->ntuple_flags) + return false; + if (keys1->basic.n_proto != keys2->basic.n_proto || keys1->basic.ip_proto != keys2->basic.ip_proto) return false; if (keys1->basic.n_proto == htons(ETH_P_IP)) { - if (keys1->addrs.v4addrs.src != keys2->addrs.v4addrs.src || - keys1->addrs.v4addrs.dst != keys2->addrs.v4addrs.dst) + if (((f1->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) && + keys1->addrs.v4addrs.src != keys2->addrs.v4addrs.src) || + ((f1->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) && + keys1->addrs.v4addrs.dst != keys2->addrs.v4addrs.dst)) return false; } else { - if (memcmp(&keys1->addrs.v6addrs.src, &keys2->addrs.v6addrs.src, - sizeof(keys1->addrs.v6addrs.src)) || - memcmp(&keys1->addrs.v6addrs.dst, &keys2->addrs.v6addrs.dst, - sizeof(keys1->addrs.v6addrs.dst))) + if (((f1->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) && + memcmp(&keys1->addrs.v6addrs.src, + &keys2->addrs.v6addrs.src, + sizeof(keys1->addrs.v6addrs.src))) || + ((f1->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) && + memcmp(&keys1->addrs.v6addrs.dst, + &keys2->addrs.v6addrs.dst, + sizeof(keys1->addrs.v6addrs.dst)))) return false; } - if (keys1->ports.ports == keys2->ports.ports && - keys1->control.flags == keys2->control.flags && - ether_addr_equal(f1->src_mac_addr, f2->src_mac_addr) && - ether_addr_equal(f1->dst_mac_addr, f2->dst_mac_addr)) + if (((f1->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) && + keys1->ports.src != keys2->ports.src) || + ((f1->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) && + keys1->ports.dst != keys2->ports.dst)) + return false; + + if (keys1->control.flags == keys2->control.flags && + f1->l2_fltr == f2->l2_fltr) return true; return false; } +struct bnxt_ntuple_filter * +bnxt_lookup_ntp_filter_from_idx(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr, u32 idx) +{ + struct bnxt_ntuple_filter *f; + struct hlist_head *head; + + head = &bp->ntp_fltr_hash_tbl[idx]; + hlist_for_each_entry_rcu(f, head, base.hash) { + if (bnxt_fltr_match(f, fltr)) + return f; + } + return NULL; +} + +#ifdef CONFIG_RFS_ACCEL static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { @@ -13598,29 +13963,31 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, struct bnxt_ntuple_filter *fltr, *new_fltr; struct flow_keys *fkeys; struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb); - int rc = 0, idx, bit_id, l2_idx = 0; - struct hlist_head *head; + struct bnxt_l2_filter *l2_fltr; + int rc = 0, idx; u32 flags; - if (!ether_addr_equal(dev->dev_addr, eth->h_dest)) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; - int off = 0, j; + if (ether_addr_equal(dev->dev_addr, eth->h_dest)) { + l2_fltr = bp->vnic_info[0].l2_filters[0]; + atomic_inc(&l2_fltr->refcnt); + } else { + struct bnxt_l2_key key; - netif_addr_lock_bh(dev); - for (j = 0; j < vnic->uc_filter_count; j++, off += ETH_ALEN) { - if (ether_addr_equal(eth->h_dest, - vnic->uc_list + off)) { - l2_idx = j + 1; - break; - } - } - netif_addr_unlock_bh(dev); - if (!l2_idx) + ether_addr_copy(key.dst_mac_addr, eth->h_dest); + key.vlan = 0; + l2_fltr = bnxt_lookup_l2_filter_from_key(bp, &key); + if (!l2_fltr) return -EINVAL; + if (l2_fltr->base.flags & BNXT_ACT_FUNC_DST) { + bnxt_del_l2_filter(bp, l2_fltr); + return -EINVAL; + } } new_fltr = kzalloc(sizeof(*new_fltr), GFP_ATOMIC); - if (!new_fltr) + if (!new_fltr) { + bnxt_del_l2_filter(bp, l2_fltr); return -ENOMEM; + } fkeys = &new_fltr->fkeys; if (!skb_flow_dissect_flow_keys(skb, fkeys, 0)) { @@ -13647,49 +14014,52 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, goto err_free; } - memcpy(new_fltr->dst_mac_addr, eth->h_dest, ETH_ALEN); - memcpy(new_fltr->src_mac_addr, eth->h_source, ETH_ALEN); + new_fltr->l2_fltr = l2_fltr; + new_fltr->ntuple_flags = BNXT_NTUPLE_MATCH_ALL; - idx = skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK; - head = &bp->ntp_fltr_hash_tbl[idx]; + idx = bnxt_get_ntp_filter_idx(bp, fkeys, skb); rcu_read_lock(); - hlist_for_each_entry_rcu(fltr, head, hash) { - if (bnxt_fltr_match(fltr, new_fltr)) { - rc = fltr->sw_id; - rcu_read_unlock(); - goto err_free; - } - } - rcu_read_unlock(); - - spin_lock_bh(&bp->ntp_fltr_lock); - bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap, - BNXT_NTP_FLTR_MAX_FLTR, 0); - if (bit_id < 0) { - spin_unlock_bh(&bp->ntp_fltr_lock); - rc = -ENOMEM; + fltr = bnxt_lookup_ntp_filter_from_idx(bp, new_fltr, idx); + if (fltr) { + rcu_read_unlock(); + rc = fltr->base.sw_id; goto err_free; } + rcu_read_unlock(); - new_fltr->sw_id = (u16)bit_id; new_fltr->flow_id = flow_id; - new_fltr->l2_fltr_idx = l2_idx; - new_fltr->rxq = rxq_index; - hlist_add_head_rcu(&new_fltr->hash, head); - bp->ntp_fltr_count++; - spin_unlock_bh(&bp->ntp_fltr_lock); - - bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT); - - return new_fltr->sw_id; + new_fltr->base.rxq = rxq_index; + rc = bnxt_insert_ntp_filter(bp, new_fltr, idx); + if (!rc) { + bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT); + return new_fltr->base.sw_id; + } err_free: + bnxt_del_l2_filter(bp, l2_fltr); kfree(new_fltr); return rc; } +#endif + +void bnxt_del_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr) +{ + spin_lock_bh(&bp->ntp_fltr_lock); + if (!test_and_clear_bit(BNXT_FLTR_INSERTED, &fltr->base.state)) { + spin_unlock_bh(&bp->ntp_fltr_lock); + return; + } + hlist_del_rcu(&fltr->base.hash); + bp->ntp_fltr_count--; + spin_unlock_bh(&bp->ntp_fltr_lock); + bnxt_del_l2_filter(bp, fltr->l2_fltr); + clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap); + kfree_rcu(fltr, base.rcu); +} static void bnxt_cfg_ntp_filters(struct bnxt *bp) { +#ifdef CONFIG_RFS_ACCEL int i; for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) { @@ -13699,13 +14069,15 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) int rc; head = &bp->ntp_fltr_hash_tbl[i]; - hlist_for_each_entry_safe(fltr, tmp, head, hash) { + hlist_for_each_entry_safe(fltr, tmp, head, base.hash) { bool del = false; - if (test_bit(BNXT_FLTR_VALID, &fltr->state)) { - if (rps_may_expire_flow(bp->dev, fltr->rxq, + if (test_bit(BNXT_FLTR_VALID, &fltr->base.state)) { + if (fltr->base.flags & BNXT_ACT_NO_AGING) + continue; + if (rps_may_expire_flow(bp->dev, fltr->base.rxq, fltr->flow_id, - fltr->sw_id)) { + fltr->base.sw_id)) { bnxt_hwrm_cfa_ntuple_filter_free(bp, fltr); del = true; @@ -13716,32 +14088,18 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) if (rc) del = true; else - set_bit(BNXT_FLTR_VALID, &fltr->state); + set_bit(BNXT_FLTR_VALID, &fltr->base.state); } - if (del) { - spin_lock_bh(&bp->ntp_fltr_lock); - hlist_del_rcu(&fltr->hash); - bp->ntp_fltr_count--; - spin_unlock_bh(&bp->ntp_fltr_lock); - synchronize_rcu(); - clear_bit(fltr->sw_id, bp->ntp_fltr_bmap); - kfree(fltr); - } + if (del) + bnxt_del_ntp_filter(bp, fltr); } } if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event)) netdev_info(bp->dev, "Receive PF driver unload event!\n"); +#endif } -#else - -static void bnxt_cfg_ntp_filters(struct bnxt *bp) -{ -} - -#endif /* CONFIG_RFS_ACCEL */ - static int bnxt_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { @@ -13900,6 +14258,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_ptp_clear(bp); unregister_netdev(dev); + bnxt_free_l2_filters(bp, true); + bnxt_free_ntp_fltrs(bp, true); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); /* Flush any pending tasks */ cancel_work_sync(&bp->sp_task); @@ -14449,6 +14809,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; + bnxt_init_l2_fltr_tbl(bp); bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d0f3e74fa025..b8ef1717cb65 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1219,7 +1219,7 @@ struct bnxt_vnic_info { u16 fw_rss_cos_lb_ctx[BNXT_MAX_CTX_PER_VNIC]; u16 fw_l2_ctx_id; #define BNXT_MAX_UC_ADDRS 4 - __le64 fw_l2_filter_id[BNXT_MAX_UC_ADDRS]; + struct bnxt_l2_filter *l2_filters[BNXT_MAX_UC_ADDRS]; /* index 0 always dev_addr */ u16 uc_filter_count; u8 *uc_list; @@ -1332,19 +1332,71 @@ struct bnxt_pf_info { struct bnxt_vf_info *vf; }; -struct bnxt_ntuple_filter { +struct bnxt_filter_base { struct hlist_node hash; - u8 dst_mac_addr[ETH_ALEN]; - u8 src_mac_addr[ETH_ALEN]; - struct flow_keys fkeys; __le64 filter_id; + u8 type; +#define BNXT_FLTR_TYPE_NTUPLE 1 +#define BNXT_FLTR_TYPE_L2 2 + u8 flags; +#define BNXT_ACT_DROP 1 +#define BNXT_ACT_RING_DST 2 +#define BNXT_ACT_FUNC_DST 4 +#define BNXT_ACT_NO_AGING 8 u16 sw_id; - u8 l2_fltr_idx; u16 rxq; - u32 flow_id; + u16 fw_vnic_id; + u16 vf_idx; unsigned long state; #define BNXT_FLTR_VALID 0 -#define BNXT_FLTR_UPDATE 1 +#define BNXT_FLTR_INSERTED 1 +#define BNXT_FLTR_FW_DELETED 2 + + struct rcu_head rcu; +}; + +struct bnxt_ntuple_filter { + struct bnxt_filter_base base; + struct flow_keys fkeys; + struct bnxt_l2_filter *l2_fltr; + u32 ntuple_flags; +#define BNXT_NTUPLE_MATCH_SRC_IP 1 +#define BNXT_NTUPLE_MATCH_DST_IP 2 +#define BNXT_NTUPLE_MATCH_SRC_PORT 4 +#define BNXT_NTUPLE_MATCH_DST_PORT 8 +#define BNXT_NTUPLE_MATCH_ALL (BNXT_NTUPLE_MATCH_SRC_IP | \ + BNXT_NTUPLE_MATCH_DST_IP | \ + BNXT_NTUPLE_MATCH_SRC_PORT | \ + BNXT_NTUPLE_MATCH_DST_PORT) + u32 flow_id; +}; + +struct bnxt_l2_key { + union { + struct { + u8 dst_mac_addr[ETH_ALEN]; + u16 vlan; + }; + u32 filter_key; + }; +}; + +struct bnxt_ipv4_tuple { + struct flow_dissector_key_ipv4_addrs v4addrs; + struct flow_dissector_key_ports ports; +}; + +struct bnxt_ipv6_tuple { + struct flow_dissector_key_ipv6_addrs v6addrs; + struct flow_dissector_key_ports ports; +}; + +#define BNXT_L2_KEY_SIZE (sizeof(struct bnxt_l2_key) / 4) + +struct bnxt_l2_filter { + struct bnxt_filter_base base; + struct bnxt_l2_key l2_key; + atomic_t refcnt; }; struct bnxt_link_info { @@ -2367,6 +2419,7 @@ struct bnxt { int db_size; #define BNXT_NTP_FLTR_MAX_FLTR 4096 +#define BNXT_MAX_FLTR (BNXT_NTP_FLTR_MAX_FLTR + BNXT_L2_FLTR_MAX_FLTR) #define BNXT_NTP_FLTR_HASH_SIZE 512 #define BNXT_NTP_FLTR_HASH_MASK (BNXT_NTP_FLTR_HASH_SIZE - 1) struct hlist_head ntp_fltr_hash_tbl[BNXT_NTP_FLTR_HASH_SIZE]; @@ -2375,6 +2428,14 @@ struct bnxt { unsigned long *ntp_fltr_bmap; int ntp_fltr_count; +#define BNXT_L2_FLTR_MAX_FLTR 1024 +#define BNXT_L2_FLTR_HASH_SIZE 32 +#define BNXT_L2_FLTR_HASH_MASK (BNXT_L2_FLTR_HASH_SIZE - 1) + struct hlist_head l2_fltr_hash_tbl[BNXT_L2_FLTR_HASH_SIZE]; + + u32 hash_seed; + u64 toeplitz_prefix; + /* To protect link related settings during link changes and * ethtool settings changes. */ @@ -2582,6 +2643,14 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, bool async_only); int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp); +void bnxt_del_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr); +int bnxt_hwrm_l2_filter_free(struct bnxt *bp, struct bnxt_l2_filter *fltr); +int bnxt_hwrm_l2_filter_alloc(struct bnxt *bp, struct bnxt_l2_filter *fltr); +int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr); +int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr); +void bnxt_fill_ipv6_mask(__be32 mask[4]); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id); int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); @@ -2625,6 +2694,13 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int bnxt_fw_init_one(struct bnxt *bp); bool bnxt_hwrm_reset_permitted(struct bnxt *bp); int bnxt_setup_mq_tc(struct net_device *dev, u8 tc); +struct bnxt_ntuple_filter *bnxt_lookup_ntp_filter_from_idx(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr, u32 idx); +u32 bnxt_get_ntp_filter_idx(struct bnxt *bp, struct flow_keys *fkeys, + const struct sk_buff *skb); +int bnxt_insert_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr, + u32 idx); +void bnxt_del_ntp_filter(struct bnxt *bp, struct bnxt_ntuple_filter *fltr); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); int bnxt_restore_pf_fw_resources(struct bnxt *bp); int bnxt_get_port_parent_id(struct net_device *dev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7e49953a93fa..5629ba9f4b2e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1011,29 +1011,60 @@ static int bnxt_set_channels(struct net_device *dev, return rc; } -#ifdef CONFIG_RFS_ACCEL -static int bnxt_grxclsrlall(struct bnxt *bp, struct ethtool_rxnfc *cmd, - u32 *rule_locs) +static u32 bnxt_get_all_fltr_ids_rcu(struct bnxt *bp, struct hlist_head tbl[], + int tbl_size, u32 *ids, u32 start, + u32 id_cnt) { - int i, j = 0; + int i, j = start; - cmd->data = bp->ntp_fltr_count; - for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) { + if (j >= id_cnt) + return j; + for (i = 0; i < tbl_size; i++) { struct hlist_head *head; - struct bnxt_ntuple_filter *fltr; + struct bnxt_filter_base *fltr; - head = &bp->ntp_fltr_hash_tbl[i]; - rcu_read_lock(); + head = &tbl[i]; hlist_for_each_entry_rcu(fltr, head, hash) { - if (j == cmd->rule_cnt) - break; - rule_locs[j++] = fltr->sw_id; + if (!fltr->flags || + test_bit(BNXT_FLTR_FW_DELETED, &fltr->state)) + continue; + ids[j++] = fltr->sw_id; + if (j == id_cnt) + return j; + } + } + return j; +} + +static struct bnxt_filter_base *bnxt_get_one_fltr_rcu(struct bnxt *bp, + struct hlist_head tbl[], + int tbl_size, u32 id) +{ + int i; + + for (i = 0; i < tbl_size; i++) { + struct hlist_head *head; + struct bnxt_filter_base *fltr; + + head = &tbl[i]; + hlist_for_each_entry_rcu(fltr, head, hash) { + if (fltr->flags && fltr->sw_id == id) + return fltr; } - rcu_read_unlock(); - if (j == cmd->rule_cnt) - break; } - cmd->rule_cnt = j; + return NULL; +} + +static int bnxt_grxclsrlall(struct bnxt *bp, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + cmd->data = bp->ntp_fltr_count; + rcu_read_lock(); + cmd->rule_cnt = bnxt_get_all_fltr_ids_rcu(bp, bp->ntp_fltr_hash_tbl, + BNXT_NTP_FLTR_HASH_SIZE, + rule_locs, 0, cmd->rule_cnt); + rcu_read_unlock(); + return 0; } @@ -1041,27 +1072,24 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd) { struct ethtool_rx_flow_spec *fs = (struct ethtool_rx_flow_spec *)&cmd->fs; + struct bnxt_filter_base *fltr_base; struct bnxt_ntuple_filter *fltr; struct flow_keys *fkeys; - int i, rc = -EINVAL; + int rc = -EINVAL; if (fs->location >= BNXT_NTP_FLTR_MAX_FLTR) return rc; - for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) { - struct hlist_head *head; - - head = &bp->ntp_fltr_hash_tbl[i]; - rcu_read_lock(); - hlist_for_each_entry_rcu(fltr, head, hash) { - if (fltr->sw_id == fs->location) - goto fltr_found; - } + rcu_read_lock(); + fltr_base = bnxt_get_one_fltr_rcu(bp, bp->ntp_fltr_hash_tbl, + BNXT_NTP_FLTR_HASH_SIZE, + fs->location); + if (!fltr_base) { rcu_read_unlock(); + return rc; } - return rc; + fltr = container_of(fltr_base, struct bnxt_ntuple_filter, base); -fltr_found: fkeys = &fltr->fkeys; if (fkeys->basic.n_proto == htons(ETH_P_IP)) { if (fkeys->basic.ip_proto == IPPROTO_TCP) @@ -1071,20 +1099,23 @@ fltr_found: else goto fltr_err; - fs->h_u.tcp_ip4_spec.ip4src = fkeys->addrs.v4addrs.src; - fs->m_u.tcp_ip4_spec.ip4src = cpu_to_be32(~0); - - fs->h_u.tcp_ip4_spec.ip4dst = fkeys->addrs.v4addrs.dst; - fs->m_u.tcp_ip4_spec.ip4dst = cpu_to_be32(~0); - - fs->h_u.tcp_ip4_spec.psrc = fkeys->ports.src; - fs->m_u.tcp_ip4_spec.psrc = cpu_to_be16(~0); - - fs->h_u.tcp_ip4_spec.pdst = fkeys->ports.dst; - fs->m_u.tcp_ip4_spec.pdst = cpu_to_be16(~0); + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) { + fs->h_u.tcp_ip4_spec.ip4src = fkeys->addrs.v4addrs.src; + fs->m_u.tcp_ip4_spec.ip4src = cpu_to_be32(~0); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) { + fs->h_u.tcp_ip4_spec.ip4dst = fkeys->addrs.v4addrs.dst; + fs->m_u.tcp_ip4_spec.ip4dst = cpu_to_be32(~0); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) { + fs->h_u.tcp_ip4_spec.psrc = fkeys->ports.src; + fs->m_u.tcp_ip4_spec.psrc = cpu_to_be16(~0); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) { + fs->h_u.tcp_ip4_spec.pdst = fkeys->ports.dst; + fs->m_u.tcp_ip4_spec.pdst = cpu_to_be16(~0); + } } else { - int i; - if (fkeys->basic.ip_proto == IPPROTO_TCP) fs->flow_type = TCP_V6_FLOW; else if (fkeys->basic.ip_proto == IPPROTO_UDP) @@ -1092,22 +1123,27 @@ fltr_found: else goto fltr_err; - *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6src[0] = - fkeys->addrs.v6addrs.src; - *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6dst[0] = - fkeys->addrs.v6addrs.dst; - for (i = 0; i < 4; i++) { - fs->m_u.tcp_ip6_spec.ip6src[i] = cpu_to_be32(~0); - fs->m_u.tcp_ip6_spec.ip6dst[i] = cpu_to_be32(~0); + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_IP) { + *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6src[0] = + fkeys->addrs.v6addrs.src; + bnxt_fill_ipv6_mask(fs->m_u.tcp_ip6_spec.ip6src); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_IP) { + *(struct in6_addr *)&fs->h_u.tcp_ip6_spec.ip6dst[0] = + fkeys->addrs.v6addrs.dst; + bnxt_fill_ipv6_mask(fs->m_u.tcp_ip6_spec.ip6dst); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_SRC_PORT) { + fs->h_u.tcp_ip6_spec.psrc = fkeys->ports.src; + fs->m_u.tcp_ip6_spec.psrc = cpu_to_be16(~0); + } + if (fltr->ntuple_flags & BNXT_NTUPLE_MATCH_DST_PORT) { + fs->h_u.tcp_ip6_spec.pdst = fkeys->ports.dst; + fs->m_u.tcp_ip6_spec.pdst = cpu_to_be16(~0); } - fs->h_u.tcp_ip6_spec.psrc = fkeys->ports.src; - fs->m_u.tcp_ip6_spec.psrc = cpu_to_be16(~0); - - fs->h_u.tcp_ip6_spec.pdst = fkeys->ports.dst; - fs->m_u.tcp_ip6_spec.pdst = cpu_to_be16(~0); } - fs->ring_cookie = fltr->rxq; + fs->ring_cookie = fltr->base.rxq; rc = 0; fltr_err: @@ -1115,7 +1151,220 @@ fltr_err: return rc; } -#endif + +#define IPV4_ALL_MASK ((__force __be32)~0) +#define L4_PORT_ALL_MASK ((__force __be16)~0) + +static bool ipv6_mask_is_full(__be32 mask[4]) +{ + return (mask[0] & mask[1] & mask[2] & mask[3]) == IPV4_ALL_MASK; +} + +static bool ipv6_mask_is_zero(__be32 mask[4]) +{ + return !(mask[0] | mask[1] | mask[2] | mask[3]); +} + +static int bnxt_add_ntuple_cls_rule(struct bnxt *bp, + struct ethtool_rx_flow_spec *fs) +{ + u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); + struct bnxt_ntuple_filter *new_fltr, *fltr; + struct bnxt_l2_filter *l2_fltr; + u32 flow_type = fs->flow_type; + struct flow_keys *fkeys; + u32 idx; + int rc; + + if (!bp->vnic_info) + return -EAGAIN; + + if ((flow_type & (FLOW_MAC_EXT | FLOW_EXT)) || vf) + return -EOPNOTSUPP; + + new_fltr = kzalloc(sizeof(*new_fltr), GFP_KERNEL); + if (!new_fltr) + return -ENOMEM; + + l2_fltr = bp->vnic_info[0].l2_filters[0]; + atomic_inc(&l2_fltr->refcnt); + new_fltr->l2_fltr = l2_fltr; + fkeys = &new_fltr->fkeys; + + rc = -EOPNOTSUPP; + switch (flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: { + struct ethtool_tcpip4_spec *ip_spec = &fs->h_u.tcp_ip4_spec; + struct ethtool_tcpip4_spec *ip_mask = &fs->m_u.tcp_ip4_spec; + + fkeys->basic.ip_proto = IPPROTO_TCP; + if (flow_type == UDP_V4_FLOW) + fkeys->basic.ip_proto = IPPROTO_UDP; + fkeys->basic.n_proto = htons(ETH_P_IP); + + if (ip_mask->ip4src == IPV4_ALL_MASK) { + fkeys->addrs.v4addrs.src = ip_spec->ip4src; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_IP; + } else if (ip_mask->ip4src) { + goto ntuple_err; + } + if (ip_mask->ip4dst == IPV4_ALL_MASK) { + fkeys->addrs.v4addrs.dst = ip_spec->ip4dst; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_IP; + } else if (ip_mask->ip4dst) { + goto ntuple_err; + } + + if (ip_mask->psrc == L4_PORT_ALL_MASK) { + fkeys->ports.src = ip_spec->psrc; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_PORT; + } else if (ip_mask->psrc) { + goto ntuple_err; + } + if (ip_mask->pdst == L4_PORT_ALL_MASK) { + fkeys->ports.dst = ip_spec->pdst; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_PORT; + } else if (ip_mask->pdst) { + goto ntuple_err; + } + break; + } + case TCP_V6_FLOW: + case UDP_V6_FLOW: { + struct ethtool_tcpip6_spec *ip_spec = &fs->h_u.tcp_ip6_spec; + struct ethtool_tcpip6_spec *ip_mask = &fs->m_u.tcp_ip6_spec; + + fkeys->basic.ip_proto = IPPROTO_TCP; + if (flow_type == UDP_V6_FLOW) + fkeys->basic.ip_proto = IPPROTO_UDP; + fkeys->basic.n_proto = htons(ETH_P_IPV6); + + if (ipv6_mask_is_full(ip_mask->ip6src)) { + fkeys->addrs.v6addrs.src = + *(struct in6_addr *)&ip_spec->ip6src; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_IP; + } else if (!ipv6_mask_is_zero(ip_mask->ip6src)) { + goto ntuple_err; + } + if (ipv6_mask_is_full(ip_mask->ip6dst)) { + fkeys->addrs.v6addrs.dst = + *(struct in6_addr *)&ip_spec->ip6dst; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_IP; + } else if (!ipv6_mask_is_zero(ip_mask->ip6dst)) { + goto ntuple_err; + } + + if (ip_mask->psrc == L4_PORT_ALL_MASK) { + fkeys->ports.src = ip_spec->psrc; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_SRC_PORT; + } else if (ip_mask->psrc) { + goto ntuple_err; + } + if (ip_mask->pdst == L4_PORT_ALL_MASK) { + fkeys->ports.dst = ip_spec->pdst; + new_fltr->ntuple_flags |= BNXT_NTUPLE_MATCH_DST_PORT; + } else if (ip_mask->pdst) { + goto ntuple_err; + } + break; + } + default: + rc = -EOPNOTSUPP; + goto ntuple_err; + } + if (!new_fltr->ntuple_flags) + goto ntuple_err; + + idx = bnxt_get_ntp_filter_idx(bp, fkeys, NULL); + rcu_read_lock(); + fltr = bnxt_lookup_ntp_filter_from_idx(bp, new_fltr, idx); + if (fltr) { + rcu_read_unlock(); + rc = -EEXIST; + goto ntuple_err; + } + rcu_read_unlock(); + + new_fltr->base.rxq = ring; + new_fltr->base.flags = BNXT_ACT_NO_AGING; + __set_bit(BNXT_FLTR_VALID, &new_fltr->base.state); + rc = bnxt_insert_ntp_filter(bp, new_fltr, idx); + if (!rc) { + rc = bnxt_hwrm_cfa_ntuple_filter_alloc(bp, new_fltr); + if (rc) { + bnxt_del_ntp_filter(bp, new_fltr); + return rc; + } + fs->location = new_fltr->base.sw_id; + return 0; + } + +ntuple_err: + atomic_dec(&l2_fltr->refcnt); + kfree(new_fltr); + return rc; +} + +static int bnxt_srxclsrlins(struct bnxt *bp, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fs = &cmd->fs; + u32 ring, flow_type; + int rc; + u8 vf; + + if (!netif_running(bp->dev)) + return -EAGAIN; + if (!(bp->flags & BNXT_FLAG_RFS)) + return -EPERM; + if (fs->location != RX_CLS_LOC_ANY) + return -EINVAL; + + ring = ethtool_get_flow_spec_ring(fs->ring_cookie); + vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + if (BNXT_VF(bp) && vf) + return -EINVAL; + if (BNXT_PF(bp) && vf > bp->pf.active_vfs) + return -EINVAL; + if (!vf && ring >= bp->rx_nr_rings) + return -EINVAL; + + flow_type = fs->flow_type; + if (flow_type & (FLOW_MAC_EXT | FLOW_RSS)) + return -EINVAL; + flow_type &= ~FLOW_EXT; + if (flow_type == ETHER_FLOW) + rc = -EOPNOTSUPP; + else + rc = bnxt_add_ntuple_cls_rule(bp, fs); + return rc; +} + +static int bnxt_srxclsrldel(struct bnxt *bp, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fs = &cmd->fs; + struct bnxt_filter_base *fltr_base; + + rcu_read_lock(); + fltr_base = bnxt_get_one_fltr_rcu(bp, bp->ntp_fltr_hash_tbl, + BNXT_NTP_FLTR_HASH_SIZE, + fs->location); + if (fltr_base) { + struct bnxt_ntuple_filter *fltr; + + fltr = container_of(fltr_base, struct bnxt_ntuple_filter, base); + rcu_read_unlock(); + if (!(fltr->base.flags & BNXT_ACT_NO_AGING)) + return -EINVAL; + bnxt_hwrm_cfa_ntuple_filter_free(bp, fltr); + bnxt_del_ntp_filter(bp, fltr); + return 0; + } + + rcu_read_unlock(); + return -ENOENT; +} static u64 get_ethtool_ipv4_rss(struct bnxt *bp) { @@ -1265,14 +1514,13 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, int rc = 0; switch (cmd->cmd) { -#ifdef CONFIG_RFS_ACCEL case ETHTOOL_GRXRINGS: cmd->data = bp->rx_nr_rings; break; case ETHTOOL_GRXCLSRLCNT: cmd->rule_cnt = bp->ntp_fltr_count; - cmd->data = BNXT_NTP_FLTR_MAX_FLTR; + cmd->data = BNXT_NTP_FLTR_MAX_FLTR | RX_CLS_LOC_SPECIAL; break; case ETHTOOL_GRXCLSRLALL: @@ -1282,7 +1530,6 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, case ETHTOOL_GRXCLSRULE: rc = bnxt_grxclsrule(bp, cmd); break; -#endif case ETHTOOL_GRXFH: rc = bnxt_grxfh(bp, cmd); @@ -1306,6 +1553,14 @@ static int bnxt_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) rc = bnxt_srxfh(bp, cmd); break; + case ETHTOOL_SRXCLSRLINS: + rc = bnxt_srxclsrlins(bp, cmd); + break; + + case ETHTOOL_SRXCLSRLDEL: + rc = bnxt_srxclsrldel(bp, cmd); + break; + default: rc = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 037624f17aea..c2b25fc623ec 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -59,7 +59,6 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, for (i = 0; i < num_frags ; i++) { skb_frag_t *frag = &sinfo->frags[i]; struct bnxt_sw_tx_bd *frag_tx_buf; - struct pci_dev *pdev = bp->pdev; dma_addr_t frag_mapping; int frag_len; @@ -73,16 +72,10 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; frag_len = skb_frag_size(frag); - frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0, - frag_len, DMA_TO_DEVICE); - - if (unlikely(dma_mapping_error(&pdev->dev, frag_mapping))) - return NULL; - - dma_unmap_addr_set(frag_tx_buf, mapping, frag_mapping); - flags = frag_len << TX_BD_LEN_SHIFT; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + frag_mapping = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); txbd->tx_bd_haddr = cpu_to_le64(frag_mapping); len = frag_len; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index e01a246124ac..f3543a2df68d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -289,7 +289,7 @@ static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv, int err; if (port_priv->vlans[vid]) { - netdev_warn(netdev, "VLAN %d already configured\n", vid); + netdev_err(netdev, "VLAN %d already configured\n", vid); return -EEXIST; } @@ -1509,9 +1509,9 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) struct device *dev = (struct device *)arg; struct ethsw_core *ethsw = dev_get_drvdata(dev); struct ethsw_port_priv *port_priv; - u32 status = ~0; int err, if_id; bool had_mac; + u32 status; err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, &status); @@ -1523,12 +1523,11 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) if_id = (status & 0xFFFF0000) >> 16; port_priv = ethsw->ports[if_id]; - if (status & DPSW_IRQ_EVENT_LINK_CHANGED) { + if (status & DPSW_IRQ_EVENT_LINK_CHANGED) dpaa2_switch_port_link_state_update(port_priv->netdev); - dpaa2_switch_port_set_mac_addr(port_priv); - } if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) { + dpaa2_switch_port_set_mac_addr(port_priv); /* We can avoid locking because the "endpoint changed" IRQ * handler is the only one who changes priv->mac at runtime, * so we are not racing with anyone. @@ -1540,20 +1539,20 @@ static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) dpaa2_switch_port_connect_mac(port_priv); } -out: err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, status); if (err) dev_err(dev, "Can't clear irq status (err %d)\n", err); +out: return IRQ_HANDLED; } static int dpaa2_switch_setup_irqs(struct fsl_mc_device *sw_dev) { + u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED | DPSW_IRQ_EVENT_ENDPOINT_CHANGED; struct device *dev = &sw_dev->dev; struct ethsw_core *ethsw = dev_get_drvdata(dev); - u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED; struct fsl_mc_device_irq *irq; int err; @@ -1775,8 +1774,10 @@ int dpaa2_switch_port_vlans_add(struct net_device *netdev, /* Make sure that the VLAN is not already configured * on the switch port */ - if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER) + if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER) { + netdev_err(netdev, "VLAN %d already configured\n", vlan->vid); return -EEXIST; + } /* Check if there is space for a new VLAN */ err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, @@ -2003,25 +2004,11 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, struct netlink_ext_ack *extack) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct dpaa2_switch_fdb *old_fdb = port_priv->fdb; struct ethsw_core *ethsw = port_priv->ethsw_data; - struct ethsw_port_priv *other_port_priv; - struct net_device *other_dev; - struct list_head *iter; bool learn_ena; int err; - netdev_for_each_lower_dev(upper_dev, other_dev, iter) { - if (!dpaa2_switch_port_dev_check(other_dev)) - continue; - - other_port_priv = netdev_priv(other_dev); - if (other_port_priv->ethsw_data != port_priv->ethsw_data) { - NL_SET_ERR_MSG_MOD(extack, - "Interface from a different DPSW is in the bridge already"); - return -EINVAL; - } - } - /* Delete the previously manually installed VLAN 1 */ err = dpaa2_switch_port_del_vlan(port_priv, 1); if (err) @@ -2039,6 +2026,11 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, if (err) goto err_egress_flood; + /* Recreate the egress flood domain of the FDB that we just left. */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id); + if (err) + goto err_egress_flood; + err = switchdev_bridge_port_offload(netdev, netdev, NULL, NULL, NULL, false, extack); if (err) @@ -2155,6 +2147,10 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, struct net_device *upper_dev, struct netlink_ext_ack *extack) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_port_priv *other_port_priv; + struct net_device *other_dev; + struct list_head *iter; int err; if (!br_vlan_enabled(upper_dev)) { @@ -2169,54 +2165,93 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, return 0; } + netdev_for_each_lower_dev(upper_dev, other_dev, iter) { + if (!dpaa2_switch_port_dev_check(other_dev)) + continue; + + other_port_priv = netdev_priv(other_dev); + if (other_port_priv->ethsw_data != port_priv->ethsw_data) { + NL_SET_ERR_MSG_MOD(extack, + "Interface from a different DPSW is in the bridge already"); + return -EINVAL; + } + } + return 0; } -static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) +static int dpaa2_switch_port_prechangeupper(struct net_device *netdev, + struct netdev_notifier_changeupper_info *info) { - struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; - int err = 0; + int err; if (!dpaa2_switch_port_dev_check(netdev)) - return NOTIFY_DONE; + return 0; extack = netdev_notifier_info_to_extack(&info->info); - - switch (event) { - case NETDEV_PRECHANGEUPPER: - upper_dev = info->upper_dev; - if (!netif_is_bridge_master(upper_dev)) - break; - + upper_dev = info->upper_dev; + if (netif_is_bridge_master(upper_dev)) { err = dpaa2_switch_prechangeupper_sanity_checks(netdev, upper_dev, extack); if (err) - goto out; + return err; if (!info->linking) dpaa2_switch_port_pre_bridge_leave(netdev); + } + + return 0; +} + +static int dpaa2_switch_port_changeupper(struct net_device *netdev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + if (!dpaa2_switch_port_dev_check(netdev)) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + upper_dev = info->upper_dev; + if (netif_is_bridge_master(upper_dev)) { + if (info->linking) + return dpaa2_switch_port_bridge_join(netdev, + upper_dev, + extack); + else + return dpaa2_switch_port_bridge_leave(netdev); + } + + return 0; +} + +static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + int err = 0; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + err = dpaa2_switch_port_prechangeupper(netdev, ptr); + if (err) + return notifier_from_errno(err); break; case NETDEV_CHANGEUPPER: - upper_dev = info->upper_dev; - if (netif_is_bridge_master(upper_dev)) { - if (info->linking) - err = dpaa2_switch_port_bridge_join(netdev, - upper_dev, - extack); - else - err = dpaa2_switch_port_bridge_leave(netdev); - } + err = dpaa2_switch_port_changeupper(netdev, ptr); + if (err) + return notifier_from_errno(err); + break; } -out: - return notifier_from_errno(err); + return NOTIFY_DONE; } struct ethsw_switchdev_event_work { @@ -3294,6 +3329,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER | NETIF_F_HW_TC; + port_netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; err = dpaa2_switch_port_init(port_priv, port_idx); if (err) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index c153dc083aff..11b14555802c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -920,6 +920,7 @@ static void enetc_imdio_remove(struct enetc_pf *pf) static bool enetc_port_has_pcs(struct enetc_pf *pf) { return (pf->if_mode == PHY_INTERFACE_MODE_SGMII || + pf->if_mode == PHY_INTERFACE_MODE_1000BASEX || pf->if_mode == PHY_INTERFACE_MODE_2500BASEX || pf->if_mode == PHY_INTERFACE_MODE_USXGMII); } @@ -1116,6 +1117,8 @@ static int enetc_phylink_create(struct enetc_ndev_priv *priv, pf->phylink_config.supported_interfaces); __set_bit(PHY_INTERFACE_MODE_SGMII, pf->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + pf->phylink_config.supported_interfaces); __set_bit(PHY_INTERFACE_MODE_2500BASEX, pf->phylink_config.supported_interfaces); __set_bit(PHY_INTERFACE_MODE_USXGMII, diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 4542e2bc28e8..f9328f2e669f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -5,6 +5,7 @@ * Shared functions for accessing and configuring the MAC */ +#include <linux/bitfield.h> #include "e1000.h" static s32 e1000_check_downshift(struct e1000_hw *hw); @@ -3260,8 +3261,7 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw, return ret_val; phy_info->mdix_mode = - (e1000_auto_x_mode) ((phy_data & IGP01E1000_PSSR_MDIX) >> - IGP01E1000_PSSR_MDIX_SHIFT); + (e1000_auto_x_mode)FIELD_GET(IGP01E1000_PSSR_MDIX, phy_data); if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) == IGP01E1000_PSSR_SPEED_1000MBPS) { @@ -3272,11 +3272,11 @@ static s32 e1000_phy_igp_get_info(struct e1000_hw *hw, if (ret_val) return ret_val; - phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >> - SR_1000T_LOCAL_RX_STATUS_SHIFT) ? + phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS, + phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; - phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >> - SR_1000T_REMOTE_RX_STATUS_SHIFT) ? + phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS, + phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; /* Get cable length */ @@ -3326,14 +3326,12 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, return ret_val; phy_info->extended_10bt_distance = - ((phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >> - M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT) ? + FIELD_GET(M88E1000_PSCR_10BT_EXT_DIST_ENABLE, phy_data) ? e1000_10bt_ext_dist_enable_lower : e1000_10bt_ext_dist_enable_normal; phy_info->polarity_correction = - ((phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >> - M88E1000_PSCR_POLARITY_REVERSAL_SHIFT) ? + FIELD_GET(M88E1000_PSCR_POLARITY_REVERSAL, phy_data) ? e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled; /* Check polarity status */ @@ -3347,27 +3345,25 @@ static s32 e1000_phy_m88_get_info(struct e1000_hw *hw, return ret_val; phy_info->mdix_mode = - (e1000_auto_x_mode) ((phy_data & M88E1000_PSSR_MDIX) >> - M88E1000_PSSR_MDIX_SHIFT); + (e1000_auto_x_mode)FIELD_GET(M88E1000_PSSR_MDIX, phy_data); if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { /* Cable Length Estimation and Local/Remote Receiver Information * are only valid at 1000 Mbps. */ phy_info->cable_length = - (e1000_cable_length) ((phy_data & - M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT); + (e1000_cable_length)FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, + phy_data); ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) return ret_val; - phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >> - SR_1000T_LOCAL_RX_STATUS_SHIFT) ? + phy_info->local_rx = FIELD_GET(SR_1000T_LOCAL_RX_STATUS, + phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; - phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >> - SR_1000T_REMOTE_RX_STATUS_SHIFT) ? + phy_info->remote_rx = FIELD_GET(SR_1000T_REMOTE_RX_STATUS, + phy_data) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; } @@ -3515,7 +3511,7 @@ s32 e1000_init_eeprom_params(struct e1000_hw *hw) if (ret_val) return ret_val; eeprom_size = - (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT; + FIELD_GET(EEPROM_SIZE_MASK, eeprom_size); /* 256B eeprom size was not supported in earlier hardware, so we * bump eeprom_size up one to ensure that "1" (which maps to * 256B) is never the result used in the shifting logic below. @@ -4891,8 +4887,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length, &phy_data); if (ret_val) return ret_val; - cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT; + cable_length = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); /* Convert the enum value to ranged values */ switch (cable_length) { @@ -5001,8 +4996,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw, &phy_data); if (ret_val) return ret_val; - *polarity = ((phy_data & M88E1000_PSSR_REV_POLARITY) >> - M88E1000_PSSR_REV_POLARITY_SHIFT) ? + *polarity = FIELD_GET(M88E1000_PSSR_REV_POLARITY, phy_data) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal; } else if (hw->phy_type == e1000_phy_igp) { @@ -5072,8 +5066,8 @@ static s32 e1000_check_downshift(struct e1000_hw *hw) if (ret_val) return ret_val; - hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >> - M88E1000_PSSR_DOWNSHIFT_SHIFT; + hw->speed_downgraded = FIELD_GET(M88E1000_PSSR_DOWNSHIFT, + phy_data); } return E1000_SUCCESS; diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index be9c695dde12..4eb1ceaf865a 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -92,8 +92,7 @@ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) nvm->type = e1000_nvm_eeprom_spi; - size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> - E1000_EECD_SIZE_EX_SHIFT); + size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. @@ -1035,17 +1034,18 @@ static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw) * iteration and increase the max iterations when * polling the phy; this fixes erroneous timeouts at 10Mbps. */ - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4), - 0xFFFF); + /* these next three accesses were always meant to use page 0x34 using + * GG82563_REG(0x34, N) but never did, so we've just corrected the call + * to not drop bits + */ + ret_val = e1000_write_kmrn_reg_80003es2lan(hw, 4, 0xFFFF); if (ret_val) return ret_val; - ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), - ®_data); + ret_val = e1000_read_kmrn_reg_80003es2lan(hw, 9, ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), - reg_data); + ret_val = e1000_write_kmrn_reg_80003es2lan(hw, 9, reg_data); if (ret_val) return ret_val; ret_val = @@ -1209,8 +1209,8 @@ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, if (ret_val) return ret_val; - kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; + kmrnctrlsta = FIELD_PREP(E1000_KMRNCTRLSTA_OFFSET, offset) | + E1000_KMRNCTRLSTA_REN; ew32(KMRNCTRLSTA, kmrnctrlsta); e1e_flush(); @@ -1244,8 +1244,7 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, if (ret_val) return ret_val; - kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | data; + kmrnctrlsta = FIELD_PREP(E1000_KMRNCTRLSTA_OFFSET, offset) | data; ew32(KMRNCTRLSTA, kmrnctrlsta); e1e_flush(); diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 0b1e890dd583..969f855a79ee 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -157,8 +157,7 @@ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw) fallthrough; default: nvm->type = e1000_nvm_eeprom_spi; - size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> - E1000_EECD_SIZE_EX_SHIFT); + size = (u16)FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. */ diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 9835e6a90d56..fc0f98ea6133 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -654,8 +654,8 @@ static void e1000_get_drvinfo(struct net_device *netdev, */ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d-%d", - (adapter->eeprom_vers & 0xF000) >> 12, - (adapter->eeprom_vers & 0x0FF0) >> 4, + FIELD_GET(0xF000, adapter->eeprom_vers), + FIELD_GET(0x0FF0, adapter->eeprom_vers), (adapter->eeprom_vers & 0x000F)); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), @@ -925,8 +925,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) } if (mac->type >= e1000_pch_lpt) - wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >> - E1000_FWSM_WLOCK_MAC_SHIFT; + wlock_mac = FIELD_GET(E1000_FWSM_WLOCK_MAC_MASK, er32(FWSM)); for (i = 0; i < mac->rar_entry_count; i++) { if (mac->type >= e1000_pch_lpt) { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 39e9fc601bf5..a2788fd5f8bb 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1072,13 +1072,11 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) * (1U << (E1000_LTRV_SCALE_FACTOR * - ((lat_enc & E1000_LTRV_SCALE_MASK) - >> E1000_LTRV_SCALE_SHIFT))); + FIELD_GET(E1000_LTRV_SCALE_MASK, lat_enc))); max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) * - (1U << (E1000_LTRV_SCALE_FACTOR * - ((max_ltr_enc & E1000_LTRV_SCALE_MASK) - >> E1000_LTRV_SCALE_SHIFT))); + (1U << (E1000_LTRV_SCALE_FACTOR * + FIELD_GET(E1000_LTRV_SCALE_MASK, max_ltr_enc))); if (lat_enc_d > max_ltr_enc_d) lat_enc = max_ltr_enc; @@ -2075,8 +2073,7 @@ static s32 e1000_write_smbus_addr(struct e1000_hw *hw) { u16 phy_data; u32 strap = er32(STRAP); - u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >> - E1000_STRAP_SMT_FREQ_SHIFT; + u32 freq = FIELD_GET(E1000_STRAP_SMT_FREQ_MASK, strap); s32 ret_val; strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; @@ -2562,8 +2559,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), - (u16)((mac_reg & E1000_RAH_AV) - >> 16)); + FIELD_GET(E1000_RAH_AV, mac_reg)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); @@ -3205,7 +3201,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) &nvm_dword); if (ret_val) return ret_val; - sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + sig_byte = FIELD_GET(0xFF00, nvm_dword); if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 0; @@ -3218,7 +3214,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) &nvm_dword); if (ret_val) return ret_val; - sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + sig_byte = FIELD_GET(0xFF00, nvm_dword); if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 1; diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index 8c3d9c5962f2..d7df2a0ed629 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -50,7 +50,7 @@ void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) * for the device regardless of function swap state. */ reg = er32(STATUS); - bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; + bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg); } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index f536c856727c..af5d9d97a0d6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1788,8 +1788,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data) adapter->corr_errors += pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; adapter->uncorr_errors += - (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> - E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; + FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts); /* Do the reset outside of interrupt context */ schedule_work(&adapter->reset_task); @@ -1868,8 +1867,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data) adapter->corr_errors += pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; adapter->uncorr_errors += - (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> - E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; + FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts); /* Do the reset outside of interrupt context */ schedule_work(&adapter->reset_task); @@ -5031,8 +5029,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter) adapter->corr_errors += pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK; adapter->uncorr_errors += - (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >> - E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT; + FIELD_GET(E1000_PBECCSTS_UNCORR_ERR_CNT_MASK, pbeccsts); } } @@ -6249,7 +6246,7 @@ static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc) phy_reg |= BM_RCTL_MPE; phy_reg &= ~(BM_RCTL_MO_MASK); if (mac_reg & E1000_RCTL_MO_3) - phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) + phy_reg |= (FIELD_GET(E1000_RCTL_MO_3, mac_reg) << BM_RCTL_MO_SHIFT); if (mac_reg & E1000_RCTL_BAM) phy_reg |= BM_RCTL_BAM; diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 08c3d477dd6f..5e329156d1ba 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -154,10 +154,9 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) e_dbg("MDI Read PHY Reg Address %d Error\n", offset); return -E1000_ERR_PHY; } - if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { e_dbg("MDI Read offset error - requested %d, returned %d\n", - offset, - (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); return -E1000_ERR_PHY; } *data = (u16)mdic; @@ -167,7 +166,6 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) */ if (hw->mac.type == e1000_pch2lan) udelay(100); - return 0; } @@ -218,10 +216,9 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) e_dbg("MDI Write PHY Red Address %d Error\n", offset); return -E1000_ERR_PHY; } - if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) { e_dbg("MDI Write offset error - requested %d, returned %d\n", - offset, - (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic)); return -E1000_ERR_PHY; } @@ -463,8 +460,8 @@ static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, return ret_val; } - kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; + kmrnctrlsta = FIELD_PREP(E1000_KMRNCTRLSTA_OFFSET, offset) | + E1000_KMRNCTRLSTA_REN; ew32(KMRNCTRLSTA, kmrnctrlsta); e1e_flush(); @@ -536,8 +533,7 @@ static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, return ret_val; } - kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & - E1000_KMRNCTRLSTA_OFFSET) | data; + kmrnctrlsta = FIELD_PREP(E1000_KMRNCTRLSTA_OFFSET, offset) | data; ew32(KMRNCTRLSTA, kmrnctrlsta); e1e_flush(); @@ -1793,8 +1789,7 @@ s32 e1000e_get_cable_length_m88(struct e1000_hw *hw) if (ret_val) return ret_val; - index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT); + index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) return -E1000_ERR_PHY; @@ -3234,8 +3229,7 @@ s32 e1000_get_cable_length_82577(struct e1000_hw *hw) if (ret_val) return ret_val; - length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> - I82577_DSTATUS_CABLE_LENGTH_SHIFT); + length = FIELD_GET(I82577_DSTATUS_CABLE_LENGTH, phy_data); if (length == E1000_CABLE_LENGTH_UNDEFINED) return -E1000_ERR_PHY; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index af1b0cde3670..98861cc6df7c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2019 Intel Corporation. */ +#include <linux/bitfield.h> #include "fm10k_pf.h" #include "fm10k_vf.h" @@ -865,8 +866,7 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, * register is RO from the VF, so the PF must do this even in the * case of notifying the VF of a new VID via the mailbox. */ - txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & - FM10K_TXQCTL_VID_MASK; + txqctl = FIELD_PREP(FM10K_TXQCTL_VID_MASK, vf_vid); txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) | FM10K_TXQCTL_VF | vf_idx; @@ -1575,8 +1575,7 @@ static s32 fm10k_get_fault_pf(struct fm10k_hw *hw, int type, if (func & FM10K_FAULT_FUNC_PF) fault->func = 0; else - fault->func = 1 + ((func & FM10K_FAULT_FUNC_VF_MASK) >> - FM10K_FAULT_FUNC_VF_SHIFT); + fault->func = 1 + FIELD_GET(FM10K_FAULT_FUNC_VF_MASK, func); /* record fault type */ fault->type = func & FM10K_FAULT_FUNC_TYPE_MASK; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index dc8ccd378ec9..7fb1961f2921 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2019 Intel Corporation. */ +#include <linux/bitfield.h> #include "fm10k_vf.h" /** @@ -126,15 +127,14 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) hw->mac.max_queues = i; /* fetch default VLAN and ITR scale */ - hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) & - FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT; + hw->mac.default_vid = FIELD_GET(FM10K_TXQCTL_VID_MASK, + fm10k_read_reg(hw, FM10K_TXQCTL(0))); /* Read the ITR scale from TDLEN. See the definition of * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is * used here. */ - hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) & - FM10K_TDLEN_ITR_SCALE_MASK) >> - FM10K_TDLEN_ITR_SCALE_SHIFT; + hw->mac.itr_scale = FIELD_GET(FM10K_TDLEN_ITR_SCALE_MASK, + fm10k_read_reg(hw, FM10K_TDLEN(0))); return 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index bd52b73cf61f..de6ca6295742 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2021 Intel Corporation. */ #include <linux/avf/virtchnl.h> +#include <linux/bitfield.h> #include <linux/delay.h> #include <linux/etherdevice.h> #include <linux/pci.h> @@ -248,6 +249,7 @@ static int i40e_aq_get_set_rss_lut(struct i40e_hw *hw, struct i40e_aqc_get_set_rss_lut *cmd_resp = (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw; int status; + u16 flags; if (set) i40e_fill_default_direct_cmd_desc(&desc, @@ -260,23 +262,18 @@ static int i40e_aq_get_set_rss_lut(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD); - cmd_resp->vsi_id = - cpu_to_le16((u16)((vsi_id << - I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) & - I40E_AQC_SET_RSS_LUT_VSI_ID_MASK)); - cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_LUT_VSI_VALID); + vsi_id = FIELD_PREP(I40E_AQC_SET_RSS_LUT_VSI_ID_MASK, vsi_id) | + FIELD_PREP(I40E_AQC_SET_RSS_LUT_VSI_VALID, 1); + cmd_resp->vsi_id = cpu_to_le16(vsi_id); if (pf_lut) - cmd_resp->flags |= cpu_to_le16((u16) - ((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF << - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); + flags = FIELD_PREP(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK, + I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF); else - cmd_resp->flags |= cpu_to_le16((u16) - ((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI << - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); + flags = FIELD_PREP(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK, + I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI); + cmd_resp->flags = cpu_to_le16(flags); status = i40e_asq_send_command(hw, &desc, lut, lut_size, NULL); return status; @@ -346,11 +343,9 @@ static int i40e_aq_get_set_rss_key(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD); - cmd_resp->vsi_id = - cpu_to_le16((u16)((vsi_id << - I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) & - I40E_AQC_SET_RSS_KEY_VSI_ID_MASK)); - cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_KEY_VSI_VALID); + vsi_id = FIELD_PREP(I40E_AQC_SET_RSS_KEY_VSI_ID_MASK, vsi_id) | + FIELD_PREP(I40E_AQC_SET_RSS_KEY_VSI_VALID, 1); + cmd_resp->vsi_id = cpu_to_le16(vsi_id); status = i40e_asq_send_command(hw, &desc, key, key_size, NULL); @@ -669,11 +664,11 @@ int i40e_init_shared_code(struct i40e_hw *hw) hw->phy.get_link_info = true; /* Determine port number and PF number*/ - port = (rd32(hw, I40E_PFGEN_PORTNUM) & I40E_PFGEN_PORTNUM_PORT_NUM_MASK) - >> I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT; + port = FIELD_GET(I40E_PFGEN_PORTNUM_PORT_NUM_MASK, + rd32(hw, I40E_PFGEN_PORTNUM)); hw->port = (u8)port; - ari = (rd32(hw, I40E_GLPCI_CAPSUP) & I40E_GLPCI_CAPSUP_ARI_EN_MASK) >> - I40E_GLPCI_CAPSUP_ARI_EN_SHIFT; + ari = FIELD_GET(I40E_GLPCI_CAPSUP_ARI_EN_MASK, + rd32(hw, I40E_GLPCI_CAPSUP)); func_rid = rd32(hw, I40E_PF_FUNC_RID); if (ari) hw->pf_id = (u8)(func_rid & 0xff); @@ -991,9 +986,8 @@ int i40e_pf_reset(struct i40e_hw *hw) * The grst delay value is in 100ms units, and we'll wait a * couple counts longer to be sure we don't just miss the end. */ - grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) & - I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >> - I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; + grst_del = FIELD_GET(I40E_GLGEN_RSTCTL_GRSTDEL_MASK, + rd32(hw, I40E_GLGEN_RSTCTL)); /* It can take upto 15 secs for GRST steady state. * Bump it to 16 secs max to be safe. @@ -1085,26 +1079,20 @@ void i40e_clear_hw(struct i40e_hw *hw) /* get number of interrupts, queues, and VFs */ val = rd32(hw, I40E_GLPCI_CNF2); - num_pf_int = (val & I40E_GLPCI_CNF2_MSI_X_PF_N_MASK) >> - I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT; - num_vf_int = (val & I40E_GLPCI_CNF2_MSI_X_VF_N_MASK) >> - I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT; + num_pf_int = FIELD_GET(I40E_GLPCI_CNF2_MSI_X_PF_N_MASK, val); + num_vf_int = FIELD_GET(I40E_GLPCI_CNF2_MSI_X_VF_N_MASK, val); val = rd32(hw, I40E_PFLAN_QALLOC); - base_queue = (val & I40E_PFLAN_QALLOC_FIRSTQ_MASK) >> - I40E_PFLAN_QALLOC_FIRSTQ_SHIFT; - j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >> - I40E_PFLAN_QALLOC_LASTQ_SHIFT; + base_queue = FIELD_GET(I40E_PFLAN_QALLOC_FIRSTQ_MASK, val); + j = FIELD_GET(I40E_PFLAN_QALLOC_LASTQ_MASK, val); if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue) num_queues = (j - base_queue) + 1; else num_queues = 0; val = rd32(hw, I40E_PF_VT_PFALLOC); - i = (val & I40E_PF_VT_PFALLOC_FIRSTVF_MASK) >> - I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT; - j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >> - I40E_PF_VT_PFALLOC_LASTVF_SHIFT; + i = FIELD_GET(I40E_PF_VT_PFALLOC_FIRSTVF_MASK, val); + j = FIELD_GET(I40E_PF_VT_PFALLOC_LASTVF_MASK, val); if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i) num_vfs = (j - i) + 1; else @@ -1199,8 +1187,7 @@ static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx) !hw->func_caps.led[idx]) return 0; gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(idx)); - port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK) >> - I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT; + port = FIELD_GET(I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK, gpio_val); /* if PRT_NUM_NA is 1 then this LED is not port specific, OR * if it is not our port then ignore @@ -1244,8 +1231,7 @@ u32 i40e_led_get(struct i40e_hw *hw) if (!gpio_val) continue; - mode = (gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK) >> - I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT; + mode = FIELD_GET(I40E_GLGEN_GPIO_CTL_LED_MODE_MASK, gpio_val); break; } @@ -1288,14 +1274,14 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink) pin_func = I40E_PIN_FUNC_LED; gpio_val &= ~I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK; - gpio_val |= ((pin_func << - I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) & - I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK); + gpio_val |= + FIELD_PREP(I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK, + pin_func); } gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK; /* this & is a bit of paranoia, but serves as a range check */ - gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) & - I40E_GLGEN_GPIO_CTL_LED_MODE_MASK); + gpio_val |= FIELD_PREP(I40E_GLGEN_GPIO_CTL_LED_MODE_MASK, + mode); if (blink) gpio_val |= BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT); @@ -3514,8 +3500,7 @@ int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type, desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); cmd->type = mib_type & I40E_AQ_LLDP_MIB_TYPE_MASK; - cmd->type |= ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) & - I40E_AQ_LLDP_BRIDGE_TYPE_MASK); + cmd->type |= FIELD_PREP(I40E_AQ_LLDP_BRIDGE_TYPE_MASK, bridge_type); desc.datalen = cpu_to_le16(buff_size); @@ -4196,8 +4181,7 @@ i40e_validate_filter_settings(struct i40e_hw *hw, /* FCHSIZE + FCDSIZE should not be greater than PMFCOEFMAX */ val = rd32(hw, I40E_GLHMC_FCOEFMAX); - fcoe_fmax = (val & I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK) - >> I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT; + fcoe_fmax = FIELD_GET(I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK, val); if (fcoe_filt_size + fcoe_cntx_size > fcoe_fmax) return -EINVAL; @@ -4233,30 +4217,25 @@ int i40e_set_filter_control(struct i40e_hw *hw, /* Program required PE hash buckets for the PF */ val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK; - val |= ((u32)settings->pe_filt_num << I40E_PFQF_CTL_0_PEHSIZE_SHIFT) & - I40E_PFQF_CTL_0_PEHSIZE_MASK; + val |= FIELD_PREP(I40E_PFQF_CTL_0_PEHSIZE_MASK, settings->pe_filt_num); /* Program required PE contexts for the PF */ val &= ~I40E_PFQF_CTL_0_PEDSIZE_MASK; - val |= ((u32)settings->pe_cntx_num << I40E_PFQF_CTL_0_PEDSIZE_SHIFT) & - I40E_PFQF_CTL_0_PEDSIZE_MASK; + val |= FIELD_PREP(I40E_PFQF_CTL_0_PEDSIZE_MASK, settings->pe_cntx_num); /* Program required FCoE hash buckets for the PF */ val &= ~I40E_PFQF_CTL_0_PFFCHSIZE_MASK; - val |= ((u32)settings->fcoe_filt_num << - I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT) & - I40E_PFQF_CTL_0_PFFCHSIZE_MASK; + val |= FIELD_PREP(I40E_PFQF_CTL_0_PFFCHSIZE_MASK, + settings->fcoe_filt_num); /* Program required FCoE DDP contexts for the PF */ val &= ~I40E_PFQF_CTL_0_PFFCDSIZE_MASK; - val |= ((u32)settings->fcoe_cntx_num << - I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT) & - I40E_PFQF_CTL_0_PFFCDSIZE_MASK; + val |= FIELD_PREP(I40E_PFQF_CTL_0_PFFCDSIZE_MASK, + settings->fcoe_cntx_num); /* Program Hash LUT size for the PF */ val &= ~I40E_PFQF_CTL_0_HASHLUTSIZE_MASK; if (settings->hash_lut_size == I40E_HASH_LUT_SIZE_512) hash_lut_size = 1; - val |= (hash_lut_size << I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT) & - I40E_PFQF_CTL_0_HASHLUTSIZE_MASK; + val |= FIELD_PREP(I40E_PFQF_CTL_0_HASHLUTSIZE_MASK, hash_lut_size); /* Enable FDIR, Ethertype and MACVLAN filters for PF and VFs */ if (settings->enable_fdir) @@ -4657,8 +4636,7 @@ int i40e_read_phy_register_clause22(struct i40e_hw *hw, "PHY: Can't write command to external PHY.\n"); } else { command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); - *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> - I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + *value = FIELD_GET(I40E_GLGEN_MSRWD_MDIRDDATA_MASK, command); } return status; @@ -4767,8 +4745,7 @@ int i40e_read_phy_register_clause45(struct i40e_hw *hw, if (!status) { command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); - *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> - I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + *value = FIELD_GET(I40E_GLGEN_MSRWD_MDIRDDATA_MASK, command); } else { i40e_debug(hw, I40E_DEBUG_PHY, "PHY: Can't read register value from external PHY.\n"); @@ -5318,16 +5295,17 @@ static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio, u8 mdio_num, struct i40e_aqc_phy_register_access *cmd) { - if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) { - if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps)) - cmd->cmd_flags |= - I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER | - ((mdio_num << - I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT) & - I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK); - else - i40e_debug(hw, I40E_DEBUG_PHY, - "MDIO I/F number selection not supported by current FW version.\n"); + if (!set_mdio || + cmd->phy_interface != I40E_AQ_PHY_REG_ACCESS_EXTERNAL) + return; + + if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps)) { + cmd->cmd_flags |= + I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER | + FIELD_PREP(I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK, + mdio_num); + } else { + i40e_debug(hw, I40E_DEBUG_PHY, "MDIO I/F number selection not supported by current FW version.\n"); } } @@ -5912,9 +5890,8 @@ i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, u16 tnl_type; u32 ti; - tnl_type = (le16_to_cpu(filters[i].element.flags) & - I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> - I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; + tnl_type = le16_get_bits(filters[i].element.flags, + I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK); /* Due to hardware eccentricities, the VNI for Geneve is shifted * one more byte further than normally used for Tenant ID in @@ -6006,9 +5983,8 @@ i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, u16 tnl_type; u32 ti; - tnl_type = (le16_to_cpu(filters[i].element.flags) & - I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> - I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; + tnl_type = le16_get_bits(filters[i].element.flags, + I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK); /* Due to hardware eccentricities, the VNI for Geneve is shifted * one more byte further than normally used for Tenant ID in diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 498728e16a37..9d88ed6105fd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2021 Intel Corporation. */ +#include <linux/bitfield.h> +#include "i40e_adminq.h" #include "i40e_alloc.h" #include "i40e_dcb.h" #include "i40e_prototype.h" @@ -20,8 +22,7 @@ int i40e_get_dcbx_status(struct i40e_hw *hw, u16 *status) return -EINVAL; reg = rd32(hw, I40E_PRTDCB_GENS); - *status = (u16)((reg & I40E_PRTDCB_GENS_DCBX_STATUS_MASK) >> - I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT); + *status = FIELD_GET(I40E_PRTDCB_GENS_DCBX_STATUS_MASK, reg); return 0; } @@ -50,12 +51,9 @@ static void i40e_parse_ieee_etscfg_tlv(struct i40e_lldp_org_tlv *tlv, * |1bit | 1bit|3 bits|3bits| */ etscfg = &dcbcfg->etscfg; - etscfg->willing = (u8)((buf[offset] & I40E_IEEE_ETS_WILLING_MASK) >> - I40E_IEEE_ETS_WILLING_SHIFT); - etscfg->cbs = (u8)((buf[offset] & I40E_IEEE_ETS_CBS_MASK) >> - I40E_IEEE_ETS_CBS_SHIFT); - etscfg->maxtcs = (u8)((buf[offset] & I40E_IEEE_ETS_MAXTC_MASK) >> - I40E_IEEE_ETS_MAXTC_SHIFT); + etscfg->willing = FIELD_GET(I40E_IEEE_ETS_WILLING_MASK, buf[offset]); + etscfg->cbs = FIELD_GET(I40E_IEEE_ETS_CBS_MASK, buf[offset]); + etscfg->maxtcs = FIELD_GET(I40E_IEEE_ETS_MAXTC_MASK, buf[offset]); /* Move offset to Priority Assignment Table */ offset++; @@ -69,11 +67,9 @@ static void i40e_parse_ieee_etscfg_tlv(struct i40e_lldp_org_tlv *tlv, * ----------------------------------------- */ for (i = 0; i < 4; i++) { - priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_1_MASK) >> - I40E_IEEE_ETS_PRIO_1_SHIFT); - etscfg->prioritytable[i * 2] = priority; - priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_0_MASK) >> - I40E_IEEE_ETS_PRIO_0_SHIFT); + priority = FIELD_GET(I40E_IEEE_ETS_PRIO_1_MASK, buf[offset]); + etscfg->prioritytable[i * 2] = priority; + priority = FIELD_GET(I40E_IEEE_ETS_PRIO_0_MASK, buf[offset]); etscfg->prioritytable[i * 2 + 1] = priority; offset++; } @@ -124,12 +120,10 @@ static void i40e_parse_ieee_etsrec_tlv(struct i40e_lldp_org_tlv *tlv, * ----------------------------------------- */ for (i = 0; i < 4; i++) { - priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_1_MASK) >> - I40E_IEEE_ETS_PRIO_1_SHIFT); - dcbcfg->etsrec.prioritytable[i*2] = priority; - priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_0_MASK) >> - I40E_IEEE_ETS_PRIO_0_SHIFT); - dcbcfg->etsrec.prioritytable[i*2 + 1] = priority; + priority = FIELD_GET(I40E_IEEE_ETS_PRIO_1_MASK, buf[offset]); + dcbcfg->etsrec.prioritytable[i * 2] = priority; + priority = FIELD_GET(I40E_IEEE_ETS_PRIO_0_MASK, buf[offset]); + dcbcfg->etsrec.prioritytable[(i * 2) + 1] = priority; offset++; } @@ -170,12 +164,9 @@ static void i40e_parse_ieee_pfccfg_tlv(struct i40e_lldp_org_tlv *tlv, * ----------------------------------------- * |1bit | 1bit|2 bits|4bits| 1 octet | */ - dcbcfg->pfc.willing = (u8)((buf[0] & I40E_IEEE_PFC_WILLING_MASK) >> - I40E_IEEE_PFC_WILLING_SHIFT); - dcbcfg->pfc.mbc = (u8)((buf[0] & I40E_IEEE_PFC_MBC_MASK) >> - I40E_IEEE_PFC_MBC_SHIFT); - dcbcfg->pfc.pfccap = (u8)((buf[0] & I40E_IEEE_PFC_CAP_MASK) >> - I40E_IEEE_PFC_CAP_SHIFT); + dcbcfg->pfc.willing = FIELD_GET(I40E_IEEE_PFC_WILLING_MASK, buf[0]); + dcbcfg->pfc.mbc = FIELD_GET(I40E_IEEE_PFC_MBC_MASK, buf[0]); + dcbcfg->pfc.pfccap = FIELD_GET(I40E_IEEE_PFC_CAP_MASK, buf[0]); dcbcfg->pfc.pfcenable = buf[1]; } @@ -196,8 +187,7 @@ static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv, u8 *buf; typelength = ntohs(tlv->typelength); - length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> - I40E_LLDP_TLV_LEN_SHIFT); + length = FIELD_GET(I40E_LLDP_TLV_LEN_MASK, typelength); buf = tlv->tlvinfo; /* The App priority table starts 5 octets after TLV header */ @@ -215,12 +205,10 @@ static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv, * ----------------------------------------- */ while (offset < length) { - dcbcfg->app[i].priority = (u8)((buf[offset] & - I40E_IEEE_APP_PRIO_MASK) >> - I40E_IEEE_APP_PRIO_SHIFT); - dcbcfg->app[i].selector = (u8)((buf[offset] & - I40E_IEEE_APP_SEL_MASK) >> - I40E_IEEE_APP_SEL_SHIFT); + dcbcfg->app[i].priority = FIELD_GET(I40E_IEEE_APP_PRIO_MASK, + buf[offset]); + dcbcfg->app[i].selector = FIELD_GET(I40E_IEEE_APP_SEL_MASK, + buf[offset]); dcbcfg->app[i].protocolid = (buf[offset + 1] << 0x8) | buf[offset + 2]; /* Move to next app */ @@ -248,8 +236,7 @@ static void i40e_parse_ieee_tlv(struct i40e_lldp_org_tlv *tlv, u8 subtype; ouisubtype = ntohl(tlv->ouisubtype); - subtype = (u8)((ouisubtype & I40E_LLDP_TLV_SUBTYPE_MASK) >> - I40E_LLDP_TLV_SUBTYPE_SHIFT); + subtype = FIELD_GET(I40E_LLDP_TLV_SUBTYPE_MASK, ouisubtype); switch (subtype) { case I40E_IEEE_SUBTYPE_ETS_CFG: i40e_parse_ieee_etscfg_tlv(tlv, dcbcfg); @@ -299,11 +286,9 @@ static void i40e_parse_cee_pgcfg_tlv(struct i40e_cee_feat_tlv *tlv, * ----------------------------------------- */ for (i = 0; i < 4; i++) { - priority = (u8)((buf[offset] & I40E_CEE_PGID_PRIO_1_MASK) >> - I40E_CEE_PGID_PRIO_1_SHIFT); - etscfg->prioritytable[i * 2] = priority; - priority = (u8)((buf[offset] & I40E_CEE_PGID_PRIO_0_MASK) >> - I40E_CEE_PGID_PRIO_0_SHIFT); + priority = FIELD_GET(I40E_CEE_PGID_PRIO_1_MASK, buf[offset]); + etscfg->prioritytable[i * 2] = priority; + priority = FIELD_GET(I40E_CEE_PGID_PRIO_0_MASK, buf[offset]); etscfg->prioritytable[i * 2 + 1] = priority; offset++; } @@ -360,8 +345,7 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, u8 i; typelength = ntohs(tlv->hdr.typelen); - length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> - I40E_LLDP_TLV_LEN_SHIFT); + length = FIELD_GET(I40E_LLDP_TLV_LEN_MASK, typelength); dcbcfg->numapps = length / sizeof(*app); @@ -417,15 +401,13 @@ static void i40e_parse_cee_tlv(struct i40e_lldp_org_tlv *tlv, u32 ouisubtype; ouisubtype = ntohl(tlv->ouisubtype); - subtype = (u8)((ouisubtype & I40E_LLDP_TLV_SUBTYPE_MASK) >> - I40E_LLDP_TLV_SUBTYPE_SHIFT); + subtype = FIELD_GET(I40E_LLDP_TLV_SUBTYPE_MASK, ouisubtype); /* Return if not CEE DCBX */ if (subtype != I40E_CEE_DCBX_TYPE) return; typelength = ntohs(tlv->typelength); - tlvlen = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> - I40E_LLDP_TLV_LEN_SHIFT); + tlvlen = FIELD_GET(I40E_LLDP_TLV_LEN_MASK, typelength); len = sizeof(tlv->typelength) + sizeof(ouisubtype) + sizeof(struct i40e_cee_ctrl_tlv); /* Return if no CEE DCBX Feature TLVs */ @@ -435,11 +417,8 @@ static void i40e_parse_cee_tlv(struct i40e_lldp_org_tlv *tlv, sub_tlv = (struct i40e_cee_feat_tlv *)((char *)tlv + len); while (feat_tlv_count < I40E_CEE_MAX_FEAT_TYPE) { typelength = ntohs(sub_tlv->hdr.typelen); - sublen = (u16)((typelength & - I40E_LLDP_TLV_LEN_MASK) >> - I40E_LLDP_TLV_LEN_SHIFT); - subtype = (u8)((typelength & I40E_LLDP_TLV_TYPE_MASK) >> - I40E_LLDP_TLV_TYPE_SHIFT); + sublen = FIELD_GET(I40E_LLDP_TLV_LEN_MASK, typelength); + subtype = FIELD_GET(I40E_LLDP_TLV_TYPE_MASK, typelength); switch (subtype) { case I40E_CEE_SUBTYPE_PG_CFG: i40e_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg); @@ -476,8 +455,7 @@ static void i40e_parse_org_tlv(struct i40e_lldp_org_tlv *tlv, u32 oui; ouisubtype = ntohl(tlv->ouisubtype); - oui = (u32)((ouisubtype & I40E_LLDP_TLV_OUI_MASK) >> - I40E_LLDP_TLV_OUI_SHIFT); + oui = FIELD_GET(I40E_LLDP_TLV_OUI_MASK, ouisubtype); switch (oui) { case I40E_IEEE_8021QAZ_OUI: i40e_parse_ieee_tlv(tlv, dcbcfg); @@ -515,10 +493,8 @@ int i40e_lldp_to_dcb_config(u8 *lldpmib, tlv = (struct i40e_lldp_org_tlv *)lldpmib; while (1) { typelength = ntohs(tlv->typelength); - type = (u16)((typelength & I40E_LLDP_TLV_TYPE_MASK) >> - I40E_LLDP_TLV_TYPE_SHIFT); - length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> - I40E_LLDP_TLV_LEN_SHIFT); + type = FIELD_GET(I40E_LLDP_TLV_TYPE_MASK, typelength); + length = FIELD_GET(I40E_LLDP_TLV_LEN_MASK, typelength); offset += sizeof(typelength) + length; /* END TLV or beyond LLDPDU size */ @@ -592,7 +568,7 @@ static void i40e_cee_to_dcb_v1_config( { u16 status, tlv_status = le16_to_cpu(cee_cfg->tlv_status); u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); - u8 i, tc, err; + u8 i, err; /* CEE PG data to ETS config */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc; @@ -601,13 +577,13 @@ static void i40e_cee_to_dcb_v1_config( * from those in the CEE Priority Group sub-TLV. */ for (i = 0; i < 4; i++) { - tc = (u8)((cee_cfg->oper_prio_tc[i] & - I40E_CEE_PGID_PRIO_0_MASK) >> - I40E_CEE_PGID_PRIO_0_SHIFT); - dcbcfg->etscfg.prioritytable[i * 2] = tc; - tc = (u8)((cee_cfg->oper_prio_tc[i] & - I40E_CEE_PGID_PRIO_1_MASK) >> - I40E_CEE_PGID_PRIO_1_SHIFT); + u8 tc; + + tc = FIELD_GET(I40E_CEE_PGID_PRIO_0_MASK, + cee_cfg->oper_prio_tc[i]); + dcbcfg->etscfg.prioritytable[i * 2] = tc; + tc = FIELD_GET(I40E_CEE_PGID_PRIO_1_MASK, + cee_cfg->oper_prio_tc[i]); dcbcfg->etscfg.prioritytable[i*2 + 1] = tc; } @@ -629,8 +605,7 @@ static void i40e_cee_to_dcb_v1_config( dcbcfg->pfc.pfcenable = cee_cfg->oper_pfc_en; dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; - status = (tlv_status & I40E_AQC_CEE_APP_STATUS_MASK) >> - I40E_AQC_CEE_APP_STATUS_SHIFT; + status = FIELD_GET(I40E_AQC_CEE_APP_STATUS_MASK, tlv_status); err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0; /* Add APPs if Error is False */ if (!err) { @@ -639,22 +614,19 @@ static void i40e_cee_to_dcb_v1_config( /* FCoE APP */ dcbcfg->app[0].priority = - (app_prio & I40E_AQC_CEE_APP_FCOE_MASK) >> - I40E_AQC_CEE_APP_FCOE_SHIFT; + FIELD_GET(I40E_AQC_CEE_APP_FCOE_MASK, app_prio); dcbcfg->app[0].selector = I40E_APP_SEL_ETHTYPE; dcbcfg->app[0].protocolid = I40E_APP_PROTOID_FCOE; /* iSCSI APP */ dcbcfg->app[1].priority = - (app_prio & I40E_AQC_CEE_APP_ISCSI_MASK) >> - I40E_AQC_CEE_APP_ISCSI_SHIFT; + FIELD_GET(I40E_AQC_CEE_APP_ISCSI_MASK, app_prio); dcbcfg->app[1].selector = I40E_APP_SEL_TCPIP; dcbcfg->app[1].protocolid = I40E_APP_PROTOID_ISCSI; /* FIP APP */ dcbcfg->app[2].priority = - (app_prio & I40E_AQC_CEE_APP_FIP_MASK) >> - I40E_AQC_CEE_APP_FIP_SHIFT; + FIELD_GET(I40E_AQC_CEE_APP_FIP_MASK, app_prio); dcbcfg->app[2].selector = I40E_APP_SEL_ETHTYPE; dcbcfg->app[2].protocolid = I40E_APP_PROTOID_FIP; } @@ -673,7 +645,7 @@ static void i40e_cee_to_dcb_config( { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); - u8 i, tc, err, sync, oper; + u8 i, err, sync, oper; /* CEE PG data to ETS config */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc; @@ -682,13 +654,13 @@ static void i40e_cee_to_dcb_config( * from those in the CEE Priority Group sub-TLV. */ for (i = 0; i < 4; i++) { - tc = (u8)((cee_cfg->oper_prio_tc[i] & - I40E_CEE_PGID_PRIO_0_MASK) >> - I40E_CEE_PGID_PRIO_0_SHIFT); - dcbcfg->etscfg.prioritytable[i * 2] = tc; - tc = (u8)((cee_cfg->oper_prio_tc[i] & - I40E_CEE_PGID_PRIO_1_MASK) >> - I40E_CEE_PGID_PRIO_1_SHIFT); + u8 tc; + + tc = FIELD_GET(I40E_CEE_PGID_PRIO_0_MASK, + cee_cfg->oper_prio_tc[i]); + dcbcfg->etscfg.prioritytable[i * 2] = tc; + tc = FIELD_GET(I40E_CEE_PGID_PRIO_1_MASK, + cee_cfg->oper_prio_tc[i]); dcbcfg->etscfg.prioritytable[i * 2 + 1] = tc; } @@ -711,8 +683,7 @@ static void i40e_cee_to_dcb_config( dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; i = 0; - status = (tlv_status & I40E_AQC_CEE_FCOE_STATUS_MASK) >> - I40E_AQC_CEE_FCOE_STATUS_SHIFT; + status = FIELD_GET(I40E_AQC_CEE_FCOE_STATUS_MASK, tlv_status); err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0; sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0; oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0; @@ -720,15 +691,13 @@ static void i40e_cee_to_dcb_config( if (!err && sync && oper) { /* FCoE APP */ dcbcfg->app[i].priority = - (app_prio & I40E_AQC_CEE_APP_FCOE_MASK) >> - I40E_AQC_CEE_APP_FCOE_SHIFT; + FIELD_GET(I40E_AQC_CEE_APP_FCOE_MASK, app_prio); dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE; dcbcfg->app[i].protocolid = I40E_APP_PROTOID_FCOE; i++; } - status = (tlv_status & I40E_AQC_CEE_ISCSI_STATUS_MASK) >> - I40E_AQC_CEE_ISCSI_STATUS_SHIFT; + status = FIELD_GET(I40E_AQC_CEE_ISCSI_STATUS_MASK, tlv_status); err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0; sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0; oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0; @@ -736,15 +705,13 @@ static void i40e_cee_to_dcb_config( if (!err && sync && oper) { /* iSCSI APP */ dcbcfg->app[i].priority = - (app_prio & I40E_AQC_CEE_APP_ISCSI_MASK) >> - I40E_AQC_CEE_APP_ISCSI_SHIFT; + FIELD_GET(I40E_AQC_CEE_APP_ISCSI_MASK, app_prio); dcbcfg->app[i].selector = I40E_APP_SEL_TCPIP; dcbcfg->app[i].protocolid = I40E_APP_PROTOID_ISCSI; i++; } - status = (tlv_status & I40E_AQC_CEE_FIP_STATUS_MASK) >> - I40E_AQC_CEE_FIP_STATUS_SHIFT; + status = FIELD_GET(I40E_AQC_CEE_FIP_STATUS_MASK, tlv_status); err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0; sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0; oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0; @@ -752,8 +719,7 @@ static void i40e_cee_to_dcb_config( if (!err && sync && oper) { /* FIP APP */ dcbcfg->app[i].priority = - (app_prio & I40E_AQC_CEE_APP_FIP_MASK) >> - I40E_AQC_CEE_APP_FIP_SHIFT; + FIELD_GET(I40E_AQC_CEE_APP_FIP_MASK, app_prio); dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE; dcbcfg->app[i].protocolid = I40E_APP_PROTOID_FIP; i++; @@ -1186,7 +1152,7 @@ static void i40e_add_ieee_app_pri_tlv(struct i40e_lldp_org_tlv *tlv, selector = dcbcfg->app[i].selector & 0x7; buf[offset] = (priority << I40E_IEEE_APP_PRIO_SHIFT) | selector; buf[offset + 1] = (dcbcfg->app[i].protocolid >> 0x8) & 0xFF; - buf[offset + 2] = dcbcfg->app[i].protocolid & 0xFF; + buf[offset + 2] = dcbcfg->app[i].protocolid & 0xFF; /* Move to next app */ offset += 3; i++; @@ -1282,8 +1248,7 @@ int i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen, do { i40e_add_dcb_tlv(tlv, dcbcfg, tlvid++); typelength = ntohs(tlv->typelength); - length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> - I40E_LLDP_TLV_LEN_SHIFT); + length = FIELD_GET(I40E_LLDP_TLV_LEN_MASK, typelength); if (length) offset += length + I40E_IEEE_TLV_HEADER_LENGTH; /* END TLV or beyond LLDPDU size */ @@ -1318,20 +1283,16 @@ void i40e_dcb_hw_rx_fifo_config(struct i40e_hw *hw, u32 reg = rd32(hw, I40E_PRTDCB_RETSC); reg &= ~I40E_PRTDCB_RETSC_ETS_MODE_MASK; - reg |= ((u32)ets_mode << I40E_PRTDCB_RETSC_ETS_MODE_SHIFT) & - I40E_PRTDCB_RETSC_ETS_MODE_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RETSC_ETS_MODE_MASK, ets_mode); reg &= ~I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK; - reg |= ((u32)non_ets_mode << I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT) & - I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK, non_ets_mode); reg &= ~I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK; - reg |= (max_exponent << I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT) & - I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK, max_exponent); reg &= ~I40E_PRTDCB_RETSC_LLTC_MASK; - reg |= (lltc_map << I40E_PRTDCB_RETSC_LLTC_SHIFT) & - I40E_PRTDCB_RETSC_LLTC_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RETSC_LLTC_MASK, lltc_map); wr32(hw, I40E_PRTDCB_RETSC, reg); } @@ -1386,14 +1347,12 @@ void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw, */ reg = rd32(hw, I40E_PRT_SWR_PM_THR); reg &= ~I40E_PRT_SWR_PM_THR_THRESHOLD_MASK; - reg |= (threshold << I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT) & - I40E_PRT_SWR_PM_THR_THRESHOLD_MASK; + reg |= FIELD_PREP(I40E_PRT_SWR_PM_THR_THRESHOLD_MASK, threshold); wr32(hw, I40E_PRT_SWR_PM_THR, reg); reg = rd32(hw, I40E_PRTDCB_RPPMC); reg &= ~I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK; - reg |= (fifo_size << I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT) & - I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK, fifo_size); wr32(hw, I40E_PRTDCB_RPPMC, reg); } @@ -1435,19 +1394,17 @@ void i40e_dcb_hw_pfc_config(struct i40e_hw *hw, reg &= ~I40E_PRTDCB_MFLCN_RFCE_MASK; reg &= ~I40E_PRTDCB_MFLCN_RPFCE_MASK; if (pfc_en) { - reg |= BIT(I40E_PRTDCB_MFLCN_RPFCM_SHIFT) & - I40E_PRTDCB_MFLCN_RPFCM_MASK; - reg |= ((u32)pfc_en << I40E_PRTDCB_MFLCN_RPFCE_SHIFT) & - I40E_PRTDCB_MFLCN_RPFCE_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_MFLCN_RPFCM_MASK, 1); + reg |= FIELD_PREP(I40E_PRTDCB_MFLCN_RPFCE_MASK, + pfc_en); } wr32(hw, I40E_PRTDCB_MFLCN, reg); reg = rd32(hw, I40E_PRTDCB_FCCFG); reg &= ~I40E_PRTDCB_FCCFG_TFCE_MASK; if (pfc_en) - reg |= (I40E_DCB_PFC_ENABLED << - I40E_PRTDCB_FCCFG_TFCE_SHIFT) & - I40E_PRTDCB_FCCFG_TFCE_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_FCCFG_TFCE_MASK, + I40E_DCB_PFC_ENABLED); wr32(hw, I40E_PRTDCB_FCCFG, reg); /* FCTTV and FCRTV to be set by default */ @@ -1465,25 +1422,22 @@ void i40e_dcb_hw_pfc_config(struct i40e_hw *hw, reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE); reg &= ~I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK; - reg |= ((u32)pfc_en << - I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT) & - I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK; + reg |= FIELD_PREP(I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK, + pfc_en); wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE, reg); reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE); reg &= ~I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK; - reg |= ((u32)pfc_en << - I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT) & - I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK; + reg |= FIELD_PREP(I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK, + pfc_en); wr32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE, reg); for (i = 0; i < I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX; i++) { reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(i)); reg &= ~I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK; if (pfc_en) { - reg |= ((u32)refresh_time << - I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT) & - I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK; + reg |= FIELD_PREP(I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK, + refresh_time); } wr32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(i), reg); } @@ -1495,14 +1449,12 @@ void i40e_dcb_hw_pfc_config(struct i40e_hw *hw, reg = rd32(hw, I40E_PRTDCB_TC2PFC); reg &= ~I40E_PRTDCB_TC2PFC_TC2PFC_MASK; - reg |= ((u32)tc2pfc << I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT) & - I40E_PRTDCB_TC2PFC_TC2PFC_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_TC2PFC_TC2PFC_MASK, tc2pfc); wr32(hw, I40E_PRTDCB_TC2PFC, reg); reg = rd32(hw, I40E_PRTDCB_RUP); reg &= ~I40E_PRTDCB_RUP_NOVLANUP_MASK; - reg |= ((u32)first_pfc_prio << I40E_PRTDCB_RUP_NOVLANUP_SHIFT) & - I40E_PRTDCB_RUP_NOVLANUP_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RUP_NOVLANUP_MASK, first_pfc_prio); wr32(hw, I40E_PRTDCB_RUP, reg); reg = rd32(hw, I40E_PRTDCB_TDPMC); @@ -1534,8 +1486,7 @@ void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc) u32 reg = rd32(hw, I40E_PRTDCB_GENC); reg &= ~I40E_PRTDCB_GENC_NUMTC_MASK; - reg |= ((u32)num_tc << I40E_PRTDCB_GENC_NUMTC_SHIFT) & - I40E_PRTDCB_GENC_NUMTC_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_GENC_NUMTC_MASK, num_tc); wr32(hw, I40E_PRTDCB_GENC, reg); } @@ -1549,8 +1500,7 @@ u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw) { u32 reg = rd32(hw, I40E_PRTDCB_GENC); - return (u8)((reg & I40E_PRTDCB_GENC_NUMTC_MASK) >> - I40E_PRTDCB_GENC_NUMTC_SHIFT); + return FIELD_GET(I40E_PRTDCB_GENC_NUMTC_MASK, reg); } /** @@ -1574,12 +1524,12 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share, reg &= ~(I40E_PRTDCB_RETSTCC_BWSHARE_MASK | I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK | I40E_PRTDCB_RETSTCC_ETSTC_SHIFT); - reg |= ((u32)bw_share[i] << I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT) & - I40E_PRTDCB_RETSTCC_BWSHARE_MASK; - reg |= ((u32)mode[i] << I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT) & - I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK; - reg |= ((u32)prio_type[i] << I40E_PRTDCB_RETSTCC_ETSTC_SHIFT) & - I40E_PRTDCB_RETSTCC_ETSTC_MASK; + reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_BWSHARE_MASK, + bw_share[i]); + reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK, + mode[i]); + reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_ETSTC_MASK, + prio_type[i]); wr32(hw, I40E_PRTDCB_RETSTCC(i), reg); } } @@ -1719,8 +1669,7 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val < old_val) { reg = rd32(hw, I40E_PRTRPB_SLW); reg &= ~I40E_PRTRPB_SLW_SLW_MASK; - reg |= (new_val << I40E_PRTRPB_SLW_SLW_SHIFT) & - I40E_PRTRPB_SLW_SLW_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SLW_SLW_MASK, new_val); wr32(hw, I40E_PRTRPB_SLW, reg); } @@ -1733,8 +1682,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val < old_val) { reg = rd32(hw, I40E_PRTRPB_SLT(i)); reg &= ~I40E_PRTRPB_SLT_SLT_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_SLT_SLT_TCN_SHIFT) & - I40E_PRTRPB_SLT_SLT_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SLT_SLT_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_SLT(i), reg); } @@ -1743,8 +1692,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val < old_val) { reg = rd32(hw, I40E_PRTRPB_DLW(i)); reg &= ~I40E_PRTRPB_DLW_DLW_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_DLW_DLW_TCN_SHIFT) & - I40E_PRTRPB_DLW_DLW_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_DLW_DLW_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_DLW(i), reg); } } @@ -1755,8 +1704,7 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val < old_val) { reg = rd32(hw, I40E_PRTRPB_SHW); reg &= ~I40E_PRTRPB_SHW_SHW_MASK; - reg |= (new_val << I40E_PRTRPB_SHW_SHW_SHIFT) & - I40E_PRTRPB_SHW_SHW_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SHW_SHW_MASK, new_val); wr32(hw, I40E_PRTRPB_SHW, reg); } @@ -1769,8 +1717,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val < old_val) { reg = rd32(hw, I40E_PRTRPB_SHT(i)); reg &= ~I40E_PRTRPB_SHT_SHT_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_SHT_SHT_TCN_SHIFT) & - I40E_PRTRPB_SHT_SHT_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SHT_SHT_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_SHT(i), reg); } @@ -1779,8 +1727,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val < old_val) { reg = rd32(hw, I40E_PRTRPB_DHW(i)); reg &= ~I40E_PRTRPB_DHW_DHW_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_DHW_DHW_TCN_SHIFT) & - I40E_PRTRPB_DHW_DHW_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_DHW_DHW_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_DHW(i), reg); } } @@ -1790,8 +1738,7 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, new_val = new_pb_cfg->tc_pool_size[i]; reg = rd32(hw, I40E_PRTRPB_DPS(i)); reg &= ~I40E_PRTRPB_DPS_DPS_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_DPS_DPS_TCN_SHIFT) & - I40E_PRTRPB_DPS_DPS_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_DPS_DPS_TCN_MASK, new_val); wr32(hw, I40E_PRTRPB_DPS(i), reg); } @@ -1799,8 +1746,7 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, new_val = new_pb_cfg->shared_pool_size; reg = rd32(hw, I40E_PRTRPB_SPS); reg &= ~I40E_PRTRPB_SPS_SPS_MASK; - reg |= (new_val << I40E_PRTRPB_SPS_SPS_SHIFT) & - I40E_PRTRPB_SPS_SPS_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SPS_SPS_MASK, new_val); wr32(hw, I40E_PRTRPB_SPS, reg); /* Program the shared pool low water mark per port if increasing */ @@ -1809,8 +1755,7 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val > old_val) { reg = rd32(hw, I40E_PRTRPB_SLW); reg &= ~I40E_PRTRPB_SLW_SLW_MASK; - reg |= (new_val << I40E_PRTRPB_SLW_SLW_SHIFT) & - I40E_PRTRPB_SLW_SLW_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SLW_SLW_MASK, new_val); wr32(hw, I40E_PRTRPB_SLW, reg); } @@ -1823,8 +1768,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val > old_val) { reg = rd32(hw, I40E_PRTRPB_SLT(i)); reg &= ~I40E_PRTRPB_SLT_SLT_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_SLT_SLT_TCN_SHIFT) & - I40E_PRTRPB_SLT_SLT_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SLT_SLT_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_SLT(i), reg); } @@ -1833,8 +1778,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val > old_val) { reg = rd32(hw, I40E_PRTRPB_DLW(i)); reg &= ~I40E_PRTRPB_DLW_DLW_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_DLW_DLW_TCN_SHIFT) & - I40E_PRTRPB_DLW_DLW_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_DLW_DLW_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_DLW(i), reg); } } @@ -1845,8 +1790,7 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val > old_val) { reg = rd32(hw, I40E_PRTRPB_SHW); reg &= ~I40E_PRTRPB_SHW_SHW_MASK; - reg |= (new_val << I40E_PRTRPB_SHW_SHW_SHIFT) & - I40E_PRTRPB_SHW_SHW_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SHW_SHW_MASK, new_val); wr32(hw, I40E_PRTRPB_SHW, reg); } @@ -1859,8 +1803,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val > old_val) { reg = rd32(hw, I40E_PRTRPB_SHT(i)); reg &= ~I40E_PRTRPB_SHT_SHT_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_SHT_SHT_TCN_SHIFT) & - I40E_PRTRPB_SHT_SHT_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_SHT_SHT_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_SHT(i), reg); } @@ -1869,8 +1813,8 @@ void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, if (new_val > old_val) { reg = rd32(hw, I40E_PRTRPB_DHW(i)); reg &= ~I40E_PRTRPB_DHW_DHW_TCN_MASK; - reg |= (new_val << I40E_PRTRPB_DHW_DHW_TCN_SHIFT) & - I40E_PRTRPB_DHW_DHW_TCN_MASK; + reg |= FIELD_PREP(I40E_PRTRPB_DHW_DHW_TCN_MASK, + new_val); wr32(hw, I40E_PRTRPB_DHW(i), reg); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 4721845fda6e..b96a92187ab3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -21,8 +21,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay) u32 val; val = rd32(hw, I40E_PRTDCB_GENC); - *delay = (u16)((val & I40E_PRTDCB_GENC_PFCLDA_MASK) >> - I40E_PRTDCB_GENC_PFCLDA_SHIFT); + *delay = FIELD_GET(I40E_PRTDCB_GENC_PFCLDA_MASK, val); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c index cf25bfc5dc3f..2f53f0f53bc3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c @@ -81,8 +81,8 @@ static int i40e_ddp_does_profile_exist(struct i40e_hw *hw, static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new, struct i40e_profile_info *old) { - unsigned int group_id_old = (u8)((old->track_id & 0x00FF0000) >> 16); - unsigned int group_id_new = (u8)((new->track_id & 0x00FF0000) >> 16); + unsigned int group_id_old = FIELD_GET(0x00FF0000, old->track_id); + unsigned int group_id_new = FIELD_GET(0x00FF0000, new->track_id); /* 0x00 group must be only the first */ if (group_id_new == 0) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 26778c448090..c841779713f6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1911,7 +1911,7 @@ static int i40e_get_eeprom(struct net_device *netdev, len = eeprom->len - (I40E_NVM_SECTOR_SIZE * i); last = true; } - offset = eeprom->offset + (I40E_NVM_SECTOR_SIZE * i), + offset = eeprom->offset + (I40E_NVM_SECTOR_SIZE * i); ret_val = i40e_aq_read_nvm(hw, 0x0, offset, len, (u8 *)eeprom_buff + (I40E_NVM_SECTOR_SIZE * i), last, NULL); @@ -1952,9 +1952,8 @@ static int i40e_get_eeprom_len(struct net_device *netdev) val = X722_EEPROM_SCOPE_LIMIT + 1; return val; } - val = (rd32(hw, I40E_GLPCI_LBARCTRL) - & I40E_GLPCI_LBARCTRL_FL_SIZE_MASK) - >> I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT; + val = FIELD_GET(I40E_GLPCI_LBARCTRL_FL_SIZE_MASK, + rd32(hw, I40E_GLPCI_LBARCTRL)); /* register returns value in power of 2, 64Kbyte chunks. */ val = (64 * 1024) * BIT(val); return val; @@ -3284,7 +3283,7 @@ static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp, } else if (valid) { data->flex_word = value & I40E_USERDEF_FLEX_WORD; data->flex_offset = - (value & I40E_USERDEF_FLEX_OFFSET) >> 16; + FIELD_GET(I40E_USERDEF_FLEX_OFFSET, value); data->flex_filter = true; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index dc642efe1cfa..6be281a8727f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1197,11 +1197,9 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) val = rd32(hw, I40E_PRTPM_EEE_STAT); nsd->tx_lpi_status = - (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >> - I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT; + FIELD_GET(I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK, val); nsd->rx_lpi_status = - (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >> - I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT; + FIELD_GET(I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK, val); i40e_stat_update32(hw, I40E_PRTPM_TLPIC, pf->stat_offsets_loaded, &osd->tx_lpi_count, &nsd->tx_lpi_count); @@ -3536,21 +3534,19 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) else return -EINVAL; - qtx_ctl |= (ring->ch->vsi_number << - I40E_QTX_CTL_VFVM_INDX_SHIFT) & - I40E_QTX_CTL_VFVM_INDX_MASK; + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, + ring->ch->vsi_number); } else { if (vsi->type == I40E_VSI_VMDQ2) { qtx_ctl = I40E_QTX_CTL_VM_QUEUE; - qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) & - I40E_QTX_CTL_VFVM_INDX_MASK; + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, + vsi->id); } else { qtx_ctl = I40E_QTX_CTL_PF_QUEUE; } } - qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & - I40E_QTX_CTL_PF_INDX_MASK); + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_PF_INDX_MASK, hw->pf_id); wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl); i40e_flush(hw); @@ -4342,8 +4338,7 @@ static irqreturn_t i40e_intr(int irq, void *data) set_bit(__I40E_RESET_INTR_RECEIVED, pf->state); ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK; val = rd32(hw, I40E_GLGEN_RSTAT); - val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) - >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; + val = FIELD_GET(I40E_GLGEN_RSTAT_RESET_TYPE_MASK, val); if (val == I40E_RESET_CORER) { pf->corer_count++; } else if (val == I40E_RESET_GLOBR) { @@ -5005,8 +5000,8 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) * next_q field of the registers. */ val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1)); - qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) - >> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; + qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, + val); val |= I40E_QUEUE_END_OF_LIST << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val); @@ -5028,8 +5023,8 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) val = rd32(hw, I40E_QINT_TQCTL(qp)); - next = (val & I40E_QINT_TQCTL_NEXTQ_INDX_MASK) - >> I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT; + next = FIELD_GET(I40E_QINT_TQCTL_NEXTQ_INDX_MASK, + val); val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK | I40E_QINT_TQCTL_MSIX0_INDX_MASK | @@ -5047,8 +5042,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) free_irq(pf->pdev->irq, pf); val = rd32(hw, I40E_PFINT_LNKLST0); - qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) - >> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT; + qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, val); val |= I40E_QUEUE_END_OF_LIST << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT; wr32(hw, I40E_PFINT_LNKLST0, val); @@ -9551,18 +9545,18 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, dev_dbg(&pf->pdev->dev, "overflow Rx Queue Number = %d QTX_CTL=0x%08x\n", queue, qtx_ctl); + if (FIELD_GET(I40E_QTX_CTL_PFVF_Q_MASK, qtx_ctl) != + I40E_QTX_CTL_VF_QUEUE) + return; + /* Queue belongs to VF, find the VF and issue VF reset */ - if (((qtx_ctl & I40E_QTX_CTL_PFVF_Q_MASK) - >> I40E_QTX_CTL_PFVF_Q_SHIFT) == I40E_QTX_CTL_VF_QUEUE) { - vf_id = (u16)((qtx_ctl & I40E_QTX_CTL_VFVM_INDX_MASK) - >> I40E_QTX_CTL_VFVM_INDX_SHIFT); - vf_id -= hw->func_caps.vf_base_id; - vf = &pf->vf[vf_id]; - i40e_vc_notify_vf_reset(vf); - /* Allow VF to process pending reset notification */ - msleep(20); - i40e_reset_vf(vf, false); - } + vf_id = FIELD_GET(I40E_QTX_CTL_VFVM_INDX_MASK, qtx_ctl); + vf_id -= hw->func_caps.vf_base_id; + vf = &pf->vf[vf_id]; + i40e_vc_notify_vf_reset(vf); + /* Allow VF to process pending reset notification */ + msleep(20); + i40e_reset_vf(vf, false); } /** @@ -9588,8 +9582,7 @@ u32 i40e_get_current_fd_count(struct i40e_pf *pf) val = rd32(&pf->hw, I40E_PFQF_FDSTAT); fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) + - ((val & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >> - I40E_PFQF_FDSTAT_BEST_CNT_SHIFT); + FIELD_GET(I40E_PFQF_FDSTAT_BEST_CNT_MASK, val); return fcnt_prog; } @@ -9603,8 +9596,7 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf) val = rd32(&pf->hw, I40E_GLQF_FDCNT_0); fcnt_prog = (val & I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK) + - ((val & I40E_GLQF_FDCNT_0_BESTCNT_MASK) >> - I40E_GLQF_FDCNT_0_BESTCNT_SHIFT); + FIELD_GET(I40E_GLQF_FDCNT_0_BESTCNT_MASK, val); return fcnt_prog; } @@ -11186,14 +11178,10 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { - u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> - I40E_GL_MDET_TX_PF_NUM_SHIFT; - u16 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >> - I40E_GL_MDET_TX_VF_NUM_SHIFT; - u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> - I40E_GL_MDET_TX_EVENT_SHIFT; - u16 queue = ((reg & I40E_GL_MDET_TX_QUEUE_MASK) >> - I40E_GL_MDET_TX_QUEUE_SHIFT) - + u8 pf_num = FIELD_GET(I40E_GL_MDET_TX_PF_NUM_MASK, reg); + u16 vf_num = FIELD_GET(I40E_GL_MDET_TX_VF_NUM_MASK, reg); + u8 event = FIELD_GET(I40E_GL_MDET_TX_EVENT_MASK, reg); + u16 queue = FIELD_GET(I40E_GL_MDET_TX_QUEUE_MASK, reg) - pf->hw.func_caps.base_queue; if (netif_msg_tx_err(pf)) dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n", @@ -11203,12 +11191,9 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { - u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> - I40E_GL_MDET_RX_FUNCTION_SHIFT; - u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> - I40E_GL_MDET_RX_EVENT_SHIFT; - u16 queue = ((reg & I40E_GL_MDET_RX_QUEUE_MASK) >> - I40E_GL_MDET_RX_QUEUE_SHIFT) - + u8 func = FIELD_GET(I40E_GL_MDET_RX_FUNCTION_MASK, reg); + u8 event = FIELD_GET(I40E_GL_MDET_RX_EVENT_MASK, reg); + u16 queue = FIELD_GET(I40E_GL_MDET_RX_QUEUE_MASK, reg) - pf->hw.func_caps.base_queue; if (netif_msg_rx_err(pf)) dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n", @@ -16172,8 +16157,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* make sure the MFS hasn't been set lower than the default */ #define MAX_FRAME_SIZE_DEFAULT 0x2600 - val = (rd32(&pf->hw, I40E_PRTGL_SAH) & - I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; + val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK, + rd32(&pf->hw, I40E_PRTGL_SAH)); if (val < MAX_FRAME_SIZE_DEFAULT) dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", pf->hw.port, val); diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 62eb34871094..605fd82f5d20 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include <linux/bitfield.h> #include <linux/delay.h> #include "i40e_alloc.h" #include "i40e_prototype.h" @@ -26,8 +27,7 @@ int i40e_init_nvm(struct i40e_hw *hw) * as the blank mode may be used in the factory line. */ gens = rd32(hw, I40E_GLNVM_GENS); - sr_size = ((gens & I40E_GLNVM_GENS_SR_SIZE_MASK) >> - I40E_GLNVM_GENS_SR_SIZE_SHIFT); + sr_size = FIELD_GET(I40E_GLNVM_GENS_SR_SIZE_MASK, gens); /* Switching to words (sr_size contains power of 2KB) */ nvm->sr_size = BIT(sr_size) * I40E_SR_WORDS_IN_1KB; @@ -193,9 +193,8 @@ static int i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset, ret_code = i40e_poll_sr_srctl_done_bit(hw); if (!ret_code) { sr_reg = rd32(hw, I40E_GLNVM_SRDATA); - *data = (u16)((sr_reg & - I40E_GLNVM_SRDATA_RDDATA_MASK) - >> I40E_GLNVM_SRDATA_RDDATA_SHIFT); + *data = FIELD_GET(I40E_GLNVM_SRDATA_RDDATA_MASK, + sr_reg); } } if (ret_code) @@ -771,13 +770,12 @@ static inline u8 i40e_nvmupd_get_module(u32 val) } static inline u8 i40e_nvmupd_get_transaction(u32 val) { - return (u8)((val & I40E_NVM_TRANS_MASK) >> I40E_NVM_TRANS_SHIFT); + return FIELD_GET(I40E_NVM_TRANS_MASK, val); } static inline u8 i40e_nvmupd_get_preservation_flags(u32 val) { - return (u8)((val & I40E_NVM_PRESERVATION_FLAGS_MASK) >> - I40E_NVM_PRESERVATION_FLAGS_SHIFT); + return FIELD_GET(I40E_NVM_PRESERVATION_FLAGS_MASK, val); } static const char * const i40e_nvm_update_state_str[] = { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 1cf993a79438..e7ebcb09f23c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -1480,8 +1480,8 @@ void i40e_ptp_init(struct i40e_pf *pf) /* Only one PF is assigned to control 1588 logic per port. Do not * enable any support for PFs not assigned via PRTTSYN_CTL0.PF_ID */ - pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >> - I40E_PRTTSYN_CTL0_PF_ID_SHIFT; + pf_id = FIELD_GET(I40E_PRTTSYN_CTL0_PF_ID_MASK, + rd32(hw, I40E_PRTTSYN_CTL0)); if (hw->pf_id != pf_id) { clear_bit(I40E_FLAG_PTP_ENA, pf->flags); dev_info(&pf->pdev->dev, "%s: PTP not supported on %s\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 2e1eaca44343..14ab642cafdb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -207,7 +207,7 @@ #define I40E_GLGEN_MSCA_OPCODE_SHIFT 26 #define I40E_GLGEN_MSCA_OPCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_OPCODE_SHIFT) #define I40E_GLGEN_MSCA_STCODE_SHIFT 28 -#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_STCODE_SHIFT) +#define I40E_GLGEN_MSCA_STCODE_MASK(_i) I40E_MASK(_i, I40E_GLGEN_MSCA_STCODE_SHIFT) #define I40E_GLGEN_MSCA_MDICMD_SHIFT 30 #define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT) #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31 diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b82df5bdfac0..971ba3322038 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -33,19 +33,16 @@ static void i40e_fdir(struct i40e_ring *tx_ring, i++; tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; - flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & - (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); + flex_ptype = FIELD_PREP(I40E_TXD_FLTR_QW0_QINDEX_MASK, fdata->q_index); - flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & - (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); + flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_FLEXOFF_MASK, + fdata->flex_off); - flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & - (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); + flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_PCTYPE_MASK, fdata->pctype); /* Use LAN VSI Id if not programmed by user */ - flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & - ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); + flex_ptype |= FIELD_PREP(I40E_TXD_FLTR_QW0_DEST_VSI_MASK, + fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id); dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; @@ -55,17 +52,15 @@ static void i40e_fdir(struct i40e_ring *tx_ring, I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << I40E_TXD_FLTR_QW1_PCMD_SHIFT; - dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & - (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); + dtype_cmd |= FIELD_PREP(I40E_TXD_FLTR_QW1_DEST_MASK, fdata->dest_ctl); - dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & - (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); + dtype_cmd |= FIELD_PREP(I40E_TXD_FLTR_QW1_FD_STATUS_MASK, + fdata->fd_status); if (fdata->cnt_index) { dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & - ((u32)fdata->cnt_index << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); + dtype_cmd |= FIELD_PREP(I40E_TXD_FLTR_QW1_CNTINDEX_MASK, + fdata->cnt_index); } fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -691,8 +686,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw, u32 error; qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw; - error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> - I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; + error = FIELD_GET(I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK, qword1); if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id); @@ -1403,8 +1397,7 @@ void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw, { u8 id; - id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> - I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; + id = FIELD_GET(I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK, qword1); if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id); @@ -1764,11 +1757,9 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u64 qword; qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> - I40E_RXD_QW1_ERROR_SHIFT; - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> - I40E_RXD_QW1_STATUS_SHIFT; + ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword); + rx_error = FIELD_GET(I40E_RXD_QW1_ERROR_MASK, qword); + rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword); decoded = decode_rx_desc_ptype(ptype); skb->ip_summed = CHECKSUM_NONE; @@ -1901,13 +1892,10 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, struct sk_buff *skb) { u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> - I40E_RXD_QW1_STATUS_SHIFT; + u32 rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword); u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK; - u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> - I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT; - u8 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> - I40E_RXD_QW1_PTYPE_SHIFT; + u32 tsyn = FIELD_GET(I40E_RXD_QW1_STATUS_TSYNINDX_MASK, rx_status); + u8 rx_ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword); if (unlikely(tsynvalid)) i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn); @@ -2554,8 +2542,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, continue; } - size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + size = FIELD_GET(I40E_RXD_QW1_LENGTH_PBUF_MASK, qword); if (!size) break; @@ -2959,8 +2946,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, i++; tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; - flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & - I40E_TXD_FLTR_QW0_QINDEX_MASK; + flex_ptype = FIELD_PREP(I40E_TXD_FLTR_QW0_QINDEX_MASK, + tx_ring->queue_index); flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : @@ -2986,14 +2973,12 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) dtype_cmd |= - ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + FIELD_PREP(I40E_TXD_FLTR_QW1_CNTINDEX_MASK, + I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)); else dtype_cmd |= - ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + FIELD_PREP(I40E_TXD_FLTR_QW1_CNTINDEX_MASK, + I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)); if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags)) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; @@ -3601,8 +3586,7 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; - td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> - I40E_TX_FLAGS_VLAN_SHIFT; + td_tag = FIELD_GET(I40E_TX_FLAGS_VLAN_MASK, tx_flags); } first->tx_flags = tx_flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index de69c2e22448..d9031499697e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -37,11 +37,11 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *); #define I40E_QTX_CTL_VM_QUEUE 0x1 #define I40E_QTX_CTL_PF_QUEUE 0x2 -#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK +#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK(1) #define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1) #define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(2) -#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK +#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_GLGEN_MSCA_STCODE_MASK(0) #define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_GLGEN_MSCA_OPCODE_MASK(0) #define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_GLGEN_MSCA_OPCODE_MASK(1) #define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_GLGEN_MSCA_OPCODE_MASK(3) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 37cca484abb8..d55a7cd9fa52 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -474,10 +474,10 @@ static void i40e_release_rdma_qvlist(struct i40e_vf *vf) */ reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx; reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx)); - next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK) - >> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT; - next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK) - >> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT; + next_q_index = FIELD_GET(I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK, + reg); + next_q_type = FIELD_GET(I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK, + reg); reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1); reg = (next_q_index & @@ -555,10 +555,10 @@ i40e_config_rdma_qvlist(struct i40e_vf *vf, * queue on top. Also link it with the new queue in CEQCTL. */ reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx)); - next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >> - I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT); - next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >> - I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); + next_q_idx = FIELD_GET(I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK, + reg); + next_q_type = FIELD_GET(I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK, + reg); if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) { reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx; @@ -659,11 +659,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, /* associate this queue with the PCI VF function */ qtx_ctl = I40E_QTX_CTL_VF_QUEUE; - qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) - & I40E_QTX_CTL_PF_INDX_MASK); - qtx_ctl |= (((vf->vf_id + hw->func_caps.vf_base_id) - << I40E_QTX_CTL_VFVM_INDX_SHIFT) - & I40E_QTX_CTL_VFVM_INDX_MASK); + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_PF_INDX_MASK, hw->pf_id); + qtx_ctl |= FIELD_PREP(I40E_QTX_CTL_VFVM_INDX_MASK, + vf->vf_id + hw->func_caps.vf_base_id); wr32(hw, I40E_QTX_CTL(pf_queue_id), qtx_ctl); i40e_flush(hw); @@ -2581,6 +2579,14 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) int aq_ret = 0; int i; + if (vf->is_disabled_from_host) { + aq_ret = -EPERM; + dev_info(&pf->pdev->dev, + "Admin has disabled VF %d, will not enable queues\n", + vf->vf_id); + goto error_param; + } + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = -EINVAL; goto error_param; @@ -4675,9 +4681,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, ivi->max_tx_rate = vf->tx_rate; ivi->min_tx_rate = 0; - ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK; - ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >> - I40E_VLAN_PRIORITY_SHIFT; + ivi->vlan = le16_get_bits(vsi->info.pvid, I40E_VLAN_MASK); + ivi->qos = le16_get_bits(vsi->info.pvid, I40E_PRIORITY_MASK); if (vf->link_forced == false) ivi->linkstate = IFLA_VF_LINK_STATE_AUTO; else if (vf->link_up == true) @@ -4708,9 +4713,12 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) struct i40e_link_status *ls = &pf->hw.phy.link_info; struct virtchnl_pf_event pfe; struct i40e_hw *hw = &pf->hw; + struct i40e_vsi *vsi; + unsigned long q_map; struct i40e_vf *vf; int abs_vf_id; int ret = 0; + int tmp; if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) { dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n"); @@ -4733,17 +4741,38 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) switch (link) { case IFLA_VF_LINK_STATE_AUTO: vf->link_forced = false; + vf->is_disabled_from_host = false; + /* reset needed to reinit VF resources */ + i40e_vc_reset_vf(vf, true); i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_ENABLE: vf->link_forced = true; vf->link_up = true; + vf->is_disabled_from_host = false; + /* reset needed to reinit VF resources */ + i40e_vc_reset_vf(vf, true); i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; vf->link_up = false; i40e_set_vf_link_state(vf, &pfe, ls); + + vsi = pf->vsi[vf->lan_vsi_idx]; + q_map = BIT(vsi->num_queue_pairs) - 1; + + vf->is_disabled_from_host = true; + + /* Try to stop both Tx&Rx rings even if one of the calls fails + * to ensure we stop the rings even in case of errors. + * If any of them returns with an error then the first + * error that occurred will be returned. + */ + tmp = i40e_ctrl_vf_tx_rings(vsi, q_map, false); + ret = i40e_ctrl_vf_rx_rings(vsi, q_map, false); + + ret = tmp ? tmp : ret; break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 2ee0f8a23248..c36580b7b2c1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -100,6 +100,7 @@ struct i40e_vf { bool link_forced; bool link_up; /* only valid if VF link is forced */ bool spoofchk; + bool is_disabled_from_host; /* PF ctrl of VF enable/disable */ u16 num_vlan; /* ADq related variables */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index e99fa854d17f..af7d5fa6cdc1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -476,8 +476,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) continue; } - size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + size = FIELD_GET(I40E_RXD_QW1_LENGTH_PBUF_MASK, qword); if (!size) break; diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 89d2bce529ae..5a25233a89d5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include <linux/avf/virtchnl.h> +#include <linux/bitfield.h> #include "iavf_type.h" #include "iavf_adminq.h" #include "iavf_prototype.h" -#include <linux/avf/virtchnl.h> /** * iavf_aq_str - convert AQ err code to a string @@ -330,6 +331,7 @@ static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, struct iavf_aq_desc desc; struct iavf_aqc_get_set_rss_lut *cmd_resp = (struct iavf_aqc_get_set_rss_lut *)&desc.params.raw; + u16 flags; if (set) iavf_fill_default_direct_cmd_desc(&desc, @@ -342,22 +344,18 @@ static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF); desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD); - cmd_resp->vsi_id = - cpu_to_le16((u16)((vsi_id << - IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT) & - IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK)); - cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_LUT_VSI_VALID); + vsi_id = FIELD_PREP(IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK, vsi_id) | + FIELD_PREP(IAVF_AQC_SET_RSS_LUT_VSI_VALID, 1); + cmd_resp->vsi_id = cpu_to_le16(vsi_id); if (pf_lut) - cmd_resp->flags |= cpu_to_le16((u16) - ((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF << - IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & - IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); + flags = FIELD_PREP(IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK, + IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF); else - cmd_resp->flags |= cpu_to_le16((u16) - ((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI << - IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & - IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); + flags = FIELD_PREP(IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK, + IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI); + + cmd_resp->flags = cpu_to_le16(flags); status = iavf_asq_send_command(hw, &desc, lut, lut_size, NULL); @@ -411,11 +409,9 @@ iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id, desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF); desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD); - cmd_resp->vsi_id = - cpu_to_le16((u16)((vsi_id << - IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT) & - IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK)); - cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_KEY_VSI_VALID); + vsi_id = FIELD_PREP(IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK, vsi_id) | + FIELD_PREP(IAVF_AQC_SET_RSS_KEY_VSI_VALID, 1); + cmd_resp->vsi_id = cpu_to_le16(vsi_id); status = iavf_asq_send_command(hw, &desc, key, key_size, NULL); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index cdb4849f5db4..378c3e9ddf9d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include <linux/bitfield.h> +#include <linux/uaccess.h> + /* ethtool support for iavf */ #include "iavf.h" -#include <linux/uaccess.h> - /* ethtool statistics helpers */ /** @@ -1016,8 +1017,7 @@ iavf_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp, #define IAVF_USERDEF_FLEX_MAX_OFFS_VAL 504 flex = &fltr->flex_words[cnt++]; flex->word = value & IAVF_USERDEF_FLEX_WORD_M; - flex->offset = (value & IAVF_USERDEF_FLEX_OFFS_M) >> - IAVF_USERDEF_FLEX_OFFS_S; + flex->offset = FIELD_GET(IAVF_USERDEF_FLEX_OFFS_M, value); if (flex->offset > IAVF_USERDEF_FLEX_MAX_OFFS_VAL) return -EINVAL; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c index 03e774bd2a5b..2d47b0b4640e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c @@ -3,6 +3,7 @@ /* flow director ethtool support for iavf */ +#include <linux/bitfield.h> #include "iavf.h" #define GTPU_PORT 2152 @@ -357,7 +358,7 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr, if (fltr->ip_mask.tclass == U8_MAX) { iph->priority = (fltr->ip_data.tclass >> 4) & 0xF; - iph->flow_lbl[0] = (fltr->ip_data.tclass << 4) & 0xF0; + iph->flow_lbl[0] = FIELD_PREP(0xF0, fltr->ip_data.tclass); VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, TC); } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index d64c4997136b..b71484c87a84 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include <linux/bitfield.h> #include <linux/prefetch.h> #include "iavf.h" @@ -988,11 +989,9 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi, u64 qword; qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT; - rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >> - IAVF_RXD_QW1_ERROR_SHIFT; - rx_status = (qword & IAVF_RXD_QW1_STATUS_MASK) >> - IAVF_RXD_QW1_STATUS_SHIFT; + ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); + rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword); + rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword); decoded = decode_rx_desc_ptype(ptype); skb->ip_summed = CHECKSUM_NONE; @@ -1533,8 +1532,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD)) break; - size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >> - IAVF_RXD_QW1_LENGTH_PBUF_SHIFT; + size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword); iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb); rx_buffer = iavf_get_rx_buffer(rx_ring, size); @@ -1581,8 +1579,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) total_rx_bytes += skb->len; qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> - IAVF_RXD_QW1_PTYPE_SHIFT; + rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); /* populate checksum, VLAN, and protocol */ iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); @@ -2290,8 +2287,7 @@ static void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb, if (tx_flags & IAVF_TX_FLAGS_HW_VLAN) { td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1; - td_tag = (tx_flags & IAVF_TX_FLAGS_VLAN_MASK) >> - IAVF_TX_FLAGS_VLAN_SHIFT; + td_tag = FIELD_GET(IAVF_TX_FLAGS_VLAN_MASK, tx_flags); } first->tx_flags = tx_flags; @@ -2467,8 +2463,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb, if (tx_flags & IAVF_TX_FLAGS_HW_OUTER_SINGLE_VLAN) { cd_type_cmd_tso_mss |= IAVF_TX_CTX_DESC_IL2TAG2 << IAVF_TXD_CTX_QW1_CMD_SHIFT; - cd_l2tag2 = (tx_flags & IAVF_TX_FLAGS_VLAN_MASK) >> - IAVF_TX_FLAGS_VLAN_SHIFT; + cd_l2tag2 = FIELD_GET(IAVF_TX_FLAGS_VLAN_MASK, tx_flags); } /* obtain protocol of skb */ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2defac6d9168..367b613d92c0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -518,7 +518,6 @@ enum ice_pf_flags { }; enum ice_misc_thread_tasks { - ICE_MISC_THREAD_EXTTS_EVENT, ICE_MISC_THREAD_TX_TSTAMP, ICE_MISC_THREAD_NBITS /* must be last */ }; @@ -609,6 +608,7 @@ struct ice_pf { u32 hw_csum_rx_error; u32 oicr_err_reg; struct msi_map oicr_irq; /* Other interrupt cause MSIX vector */ + struct msi_map ll_ts_irq; /* LL_TS interrupt MSIX vector */ u16 max_pf_txqs; /* Total Tx queues PF wide */ u16 max_pf_rxqs; /* Total Rx queues PF wide */ u16 num_lan_msix; /* Total MSIX vectors for base driver */ @@ -633,6 +633,7 @@ struct ice_pf { unsigned long tx_timeout_last_recovery; u32 tx_timeout_recovery_level; char int_name[ICE_INT_NAME_STR_LEN]; + char int_name_ll_ts[ICE_INT_NAME_STR_LEN]; struct auxiliary_device *adev; int aux_idx; u32 sw_int_count; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 12c510bb1d9b..6a5e974a1776 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -422,10 +422,10 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID BIT(2) #define ICE_AQ_VSI_INNER_VLAN_EMODE_S 3 #define ICE_AQ_VSI_INNER_VLAN_EMODE_M (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH (0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP (0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR (0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) -#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING (0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S) +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH 0x0U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP 0x1U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR 0x2U +#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING 0x3U u8 inner_vlan_reserved2[3]; /* ingress egress up sections */ __le32 ingress_table; /* bitmap, 3 bits per up */ @@ -491,7 +491,7 @@ struct ice_aqc_vsi_props { #define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S 2 #define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M (0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S) #define ICE_AQ_VSI_Q_OPT_RSS_HASH_S 6 -#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M (0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) +#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M GENMASK(7, 6) #define ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ 0x0U #define ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ 0x1U #define ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR 0x2U diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index b25b7f415965..533b923cae2d 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -232,24 +232,16 @@ static void ice_cfg_itr_gran(struct ice_hw *hw) /* no need to update global register if ITR gran is already set */ if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) && - (((regval & GLINT_CTL_ITR_GRAN_200_M) >> - GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_100_M) >> - GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_50_M) >> - GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) && - (((regval & GLINT_CTL_ITR_GRAN_25_M) >> - GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US)) + (FIELD_GET(GLINT_CTL_ITR_GRAN_200_M, regval) == ICE_ITR_GRAN_US) && + (FIELD_GET(GLINT_CTL_ITR_GRAN_100_M, regval) == ICE_ITR_GRAN_US) && + (FIELD_GET(GLINT_CTL_ITR_GRAN_50_M, regval) == ICE_ITR_GRAN_US) && + (FIELD_GET(GLINT_CTL_ITR_GRAN_25_M, regval) == ICE_ITR_GRAN_US)) return; - regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) & - GLINT_CTL_ITR_GRAN_200_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) & - GLINT_CTL_ITR_GRAN_100_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) & - GLINT_CTL_ITR_GRAN_50_M) | - ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) & - GLINT_CTL_ITR_GRAN_25_M); + regval = FIELD_PREP(GLINT_CTL_ITR_GRAN_200_M, ICE_ITR_GRAN_US) | + FIELD_PREP(GLINT_CTL_ITR_GRAN_100_M, ICE_ITR_GRAN_US) | + FIELD_PREP(GLINT_CTL_ITR_GRAN_50_M, ICE_ITR_GRAN_US) | + FIELD_PREP(GLINT_CTL_ITR_GRAN_25_M, ICE_ITR_GRAN_US); wr32(hw, GLINT_CTL, regval); } @@ -286,7 +278,7 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 */ static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring) { - struct ice_vsi *vsi = ring->vsi; + const struct ice_vsi *vsi = ring->vsi; int i; ice_for_each_txq(vsi, i) { @@ -936,10 +928,10 @@ ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) struct ice_hw *hw = &pf->hw; u32 val; - itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M; + itr_idx = FIELD_PREP(QINT_TQCTL_ITR_INDX_M, itr_idx); val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | - ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M); + FIELD_PREP(QINT_TQCTL_MSIX_INDX_M, msix_idx); wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); if (ice_is_xdp_ena_vsi(vsi)) { @@ -968,10 +960,10 @@ ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) struct ice_hw *hw = &pf->hw; u32 val; - itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M; + itr_idx = FIELD_PREP(QINT_RQCTL_ITR_INDX_M, itr_idx); val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | - ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M); + FIELD_PREP(QINT_RQCTL_MSIX_INDX_M, msix_idx); wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); @@ -983,7 +975,7 @@ ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) * @hw: pointer to the HW structure * @q_vector: interrupt vector to trigger the software interrupt for */ -void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) +void ice_trigger_sw_intr(struct ice_hw *hw, const struct ice_q_vector *q_vector) { wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) | @@ -1058,7 +1050,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, * are needed for stopping Tx queue */ void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, +ice_fill_txq_meta(const struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { struct ice_channel *ch = ring->ch; diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h index b67dca417acb..17321ba75602 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.h +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -22,12 +22,12 @@ void ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx); void ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); -void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); +void ice_trigger_sw_intr(struct ice_hw *hw, const struct ice_q_vector *q_vector); int ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, +ice_fill_txq_meta(const struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); #endif /* _ICE_BASE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 08a1f699a34f..d3c7e519d29b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -942,9 +942,8 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) */ static void ice_get_itr_intrl_gran(struct ice_hw *hw) { - u8 max_agg_bw = (rd32(hw, GL_PWR_MODE_CTL) & - GL_PWR_MODE_CTL_CAR_MAX_BW_M) >> - GL_PWR_MODE_CTL_CAR_MAX_BW_S; + u8 max_agg_bw = FIELD_GET(GL_PWR_MODE_CTL_CAR_MAX_BW_M, + rd32(hw, GL_PWR_MODE_CTL)); switch (max_agg_bw) { case ICE_MAX_AGG_BW_200G: @@ -976,9 +975,7 @@ int ice_init_hw(struct ice_hw *hw) if (status) return status; - hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) & - PF_FUNC_RID_FUNC_NUM_M) >> - PF_FUNC_RID_FUNC_NUM_S; + hw->pf_id = FIELD_GET(PF_FUNC_RID_FUNC_NUM_M, rd32(hw, PF_FUNC_RID)); status = ice_reset(hw, ICE_RESET_PFR); if (status) @@ -1163,8 +1160,8 @@ int ice_check_reset(struct ice_hw *hw) * or EMPR has occurred. The grst delay value is in 100ms units. * Add 1sec for outstanding AQ commands that can take a long time. */ - grst_timeout = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >> - GLGEN_RSTCTL_GRSTDEL_S) + 10; + grst_timeout = FIELD_GET(GLGEN_RSTCTL_GRSTDEL_M, + rd32(hw, GLGEN_RSTCTL)) + 10; for (cnt = 0; cnt < grst_timeout; cnt++) { mdelay(100); @@ -2248,7 +2245,7 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0); info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0); - info->clk_freq = (number & ICE_TS_CLK_FREQ_M) >> ICE_TS_CLK_FREQ_S; + info->clk_freq = FIELD_GET(ICE_TS_CLK_FREQ_M, number); info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0); if (info->clk_freq < NUM_ICE_TIME_REF_FREQ) { @@ -2449,11 +2446,12 @@ ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, info->tmr0_owned = ((number & ICE_TS_TMR0_OWND_M) != 0); info->tmr0_ena = ((number & ICE_TS_TMR0_ENA_M) != 0); - info->tmr1_owner = (number & ICE_TS_TMR1_OWNR_M) >> ICE_TS_TMR1_OWNR_S; + info->tmr1_owner = FIELD_GET(ICE_TS_TMR1_OWNR_M, number); info->tmr1_owned = ((number & ICE_TS_TMR1_OWND_M) != 0); info->tmr1_ena = ((number & ICE_TS_TMR1_ENA_M) != 0); info->ts_ll_read = ((number & ICE_TS_LL_TX_TS_READ_M) != 0); + info->ts_ll_int_read = ((number & ICE_TS_LL_TX_TS_INT_READ_M) != 0); info->ena_ports = logical_id; info->tmr_own_map = phys_id; @@ -2474,6 +2472,8 @@ ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, info->tmr1_ena); ice_debug(hw, ICE_DBG_INIT, "dev caps: ts_ll_read = %u\n", info->ts_ll_read); + ice_debug(hw, ICE_DBG_INIT, "dev caps: ts_ll_int_read = %u\n", + info->ts_ll_int_read); ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 ena_ports = %u\n", info->ena_ports); ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr_own_map = %u\n", @@ -3884,6 +3884,7 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, { struct ice_aqc_sff_eeprom *cmd; struct ice_aq_desc desc; + u16 i2c_bus_addr; int status; if (!data || (mem_addr & 0xff00)) @@ -3894,15 +3895,13 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); cmd->lport_num = (u8)(lport & 0xff); cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); - cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & - ICE_AQC_SFF_I2CBUS_7BIT_M) | - ((set_page << - ICE_AQC_SFF_SET_EEPROM_PAGE_S) & - ICE_AQC_SFF_SET_EEPROM_PAGE_M)); - cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff); - cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S); + i2c_bus_addr = FIELD_PREP(ICE_AQC_SFF_I2CBUS_7BIT_M, bus_addr >> 1) | + FIELD_PREP(ICE_AQC_SFF_SET_EEPROM_PAGE_M, set_page); if (write) - cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE); + i2c_bus_addr |= ICE_AQC_SFF_IS_WRITE; + cmd->i2c_bus_addr = cpu_to_le16(i2c_bus_addr); + cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff); + cmd->eeprom_page = le16_encode_bits(page, ICE_AQC_SFF_EEPROM_PAGE_M); status = ice_aq_send_cmd(hw, &desc, data, length, cd); return status; @@ -4157,6 +4156,7 @@ ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps, struct ice_aqc_dis_txq_item *item; struct ice_aqc_dis_txqs *cmd; struct ice_aq_desc desc; + u16 vmvf_and_timeout; u16 i, sz = 0; int status; @@ -4172,27 +4172,26 @@ ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps, cmd->num_entries = num_qgrps; - cmd->vmvf_and_timeout = cpu_to_le16((5 << ICE_AQC_Q_DIS_TIMEOUT_S) & - ICE_AQC_Q_DIS_TIMEOUT_M); + vmvf_and_timeout = FIELD_PREP(ICE_AQC_Q_DIS_TIMEOUT_M, 5); switch (rst_src) { case ICE_VM_RESET: cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VM_RESET; - cmd->vmvf_and_timeout |= - cpu_to_le16(vmvf_num & ICE_AQC_Q_DIS_VMVF_NUM_M); + vmvf_and_timeout |= vmvf_num & ICE_AQC_Q_DIS_VMVF_NUM_M; break; case ICE_VF_RESET: cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VF_RESET; /* In this case, FW expects vmvf_num to be absolute VF ID */ - cmd->vmvf_and_timeout |= - cpu_to_le16((vmvf_num + hw->func_caps.vf_base_id) & - ICE_AQC_Q_DIS_VMVF_NUM_M); + vmvf_and_timeout |= (vmvf_num + hw->func_caps.vf_base_id) & + ICE_AQC_Q_DIS_VMVF_NUM_M; break; case ICE_NO_RESET: default: break; } + cmd->vmvf_and_timeout = cpu_to_le16(vmvf_and_timeout); + /* flush pipe on time out */ cmd->cmd_type |= ICE_AQC_Q_DIS_CMD_FLUSH_PIPE; /* If no queue group info, we are in a reset flow. Issue the AQ */ @@ -4267,10 +4266,8 @@ ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, cmd->cmd_type = ICE_AQC_Q_CFG_TC_CHNG; cmd->num_qs = num_qs; cmd->port_num_chng = (oldport & ICE_AQC_Q_CFG_SRC_PRT_M); - cmd->port_num_chng |= (newport << ICE_AQC_Q_CFG_DST_PRT_S) & - ICE_AQC_Q_CFG_DST_PRT_M; - cmd->time_out = (5 << ICE_AQC_Q_CFG_TIMEOUT_S) & - ICE_AQC_Q_CFG_TIMEOUT_M; + cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_DST_PRT_M, newport); + cmd->time_out = FIELD_PREP(ICE_AQC_Q_CFG_TIMEOUT_M, 5); cmd->blocked_cgds = 0; status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); @@ -5776,7 +5773,7 @@ ice_get_link_default_override(struct ice_link_default_override_tlv *ldo, ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n"); return status; } - ldo->options = buf & ICE_LINK_OVERRIDE_OPT_M; + ldo->options = FIELD_GET(ICE_LINK_OVERRIDE_OPT_M, buf); ldo->phy_config = (buf & ICE_LINK_OVERRIDE_PHY_CFG_M) >> ICE_LINK_OVERRIDE_PHY_CFG_S; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 396e555023aa..74418c445cc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -35,8 +35,7 @@ ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf, ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_get_mib); cmd->type = mib_type & ICE_AQ_LLDP_MIB_TYPE_M; - cmd->type |= (bridge_type << ICE_AQ_LLDP_BRID_TYPE_S) & - ICE_AQ_LLDP_BRID_TYPE_M; + cmd->type |= FIELD_PREP(ICE_AQ_LLDP_BRID_TYPE_M, bridge_type); desc.datalen = cpu_to_le16(buf_size); @@ -147,8 +146,7 @@ static u8 ice_get_dcbx_status(struct ice_hw *hw) u32 reg; reg = rd32(hw, PRTDCB_GENS); - return (u8)((reg & PRTDCB_GENS_DCBX_STATUS_M) >> - PRTDCB_GENS_DCBX_STATUS_S); + return FIELD_GET(PRTDCB_GENS_DCBX_STATUS_M, reg); } /** @@ -174,11 +172,9 @@ ice_parse_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg) */ for (i = 0; i < 4; i++) { ets_cfg->prio_table[i * 2] = - ((buf[offset] & ICE_IEEE_ETS_PRIO_1_M) >> - ICE_IEEE_ETS_PRIO_1_S); + FIELD_GET(ICE_IEEE_ETS_PRIO_1_M, buf[offset]); ets_cfg->prio_table[i * 2 + 1] = - ((buf[offset] & ICE_IEEE_ETS_PRIO_0_M) >> - ICE_IEEE_ETS_PRIO_0_S); + FIELD_GET(ICE_IEEE_ETS_PRIO_0_M, buf[offset]); offset++; } @@ -222,11 +218,9 @@ ice_parse_ieee_etscfg_tlv(struct ice_lldp_org_tlv *tlv, * |1bit | 1bit|3 bits|3bits| */ etscfg = &dcbcfg->etscfg; - etscfg->willing = ((buf[0] & ICE_IEEE_ETS_WILLING_M) >> - ICE_IEEE_ETS_WILLING_S); - etscfg->cbs = ((buf[0] & ICE_IEEE_ETS_CBS_M) >> ICE_IEEE_ETS_CBS_S); - etscfg->maxtcs = ((buf[0] & ICE_IEEE_ETS_MAXTC_M) >> - ICE_IEEE_ETS_MAXTC_S); + etscfg->willing = FIELD_GET(ICE_IEEE_ETS_WILLING_M, buf[0]); + etscfg->cbs = FIELD_GET(ICE_IEEE_ETS_CBS_M, buf[0]); + etscfg->maxtcs = FIELD_GET(ICE_IEEE_ETS_MAXTC_M, buf[0]); /* Begin parsing at Priority Assignment Table (offset 1 in buf) */ ice_parse_ieee_ets_common_tlv(&buf[1], etscfg); @@ -268,11 +262,9 @@ ice_parse_ieee_pfccfg_tlv(struct ice_lldp_org_tlv *tlv, * ----------------------------------------- * |1bit | 1bit|2 bits|4bits| 1 octet | */ - dcbcfg->pfc.willing = ((buf[0] & ICE_IEEE_PFC_WILLING_M) >> - ICE_IEEE_PFC_WILLING_S); - dcbcfg->pfc.mbc = ((buf[0] & ICE_IEEE_PFC_MBC_M) >> ICE_IEEE_PFC_MBC_S); - dcbcfg->pfc.pfccap = ((buf[0] & ICE_IEEE_PFC_CAP_M) >> - ICE_IEEE_PFC_CAP_S); + dcbcfg->pfc.willing = FIELD_GET(ICE_IEEE_PFC_WILLING_M, buf[0]); + dcbcfg->pfc.mbc = FIELD_GET(ICE_IEEE_PFC_MBC_M, buf[0]); + dcbcfg->pfc.pfccap = FIELD_GET(ICE_IEEE_PFC_CAP_M, buf[0]); dcbcfg->pfc.pfcena = buf[1]; } @@ -294,7 +286,7 @@ ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv, u8 *buf; typelen = ntohs(tlv->typelen); - len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S); + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); buf = tlv->tlvinfo; /* Removing sizeof(ouisubtype) and reserved byte from len. @@ -314,12 +306,10 @@ ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv, * ----------------------------------------- */ while (offset < len) { - dcbcfg->app[i].priority = ((buf[offset] & - ICE_IEEE_APP_PRIO_M) >> - ICE_IEEE_APP_PRIO_S); - dcbcfg->app[i].selector = ((buf[offset] & - ICE_IEEE_APP_SEL_M) >> - ICE_IEEE_APP_SEL_S); + dcbcfg->app[i].priority = FIELD_GET(ICE_IEEE_APP_PRIO_M, + buf[offset]); + dcbcfg->app[i].selector = FIELD_GET(ICE_IEEE_APP_SEL_M, + buf[offset]); dcbcfg->app[i].prot_id = (buf[offset + 1] << 0x8) | buf[offset + 2]; /* Move to next app */ @@ -347,8 +337,7 @@ ice_parse_ieee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) u8 subtype; ouisubtype = ntohl(tlv->ouisubtype); - subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >> - ICE_LLDP_TLV_SUBTYPE_S); + subtype = FIELD_GET(ICE_LLDP_TLV_SUBTYPE_M, ouisubtype); switch (subtype) { case ICE_IEEE_SUBTYPE_ETS_CFG: ice_parse_ieee_etscfg_tlv(tlv, dcbcfg); @@ -399,11 +388,9 @@ ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv, */ for (i = 0; i < 4; i++) { etscfg->prio_table[i * 2] = - ((buf[offset] & ICE_CEE_PGID_PRIO_1_M) >> - ICE_CEE_PGID_PRIO_1_S); + FIELD_GET(ICE_CEE_PGID_PRIO_1_M, buf[offset]); etscfg->prio_table[i * 2 + 1] = - ((buf[offset] & ICE_CEE_PGID_PRIO_0_M) >> - ICE_CEE_PGID_PRIO_0_S); + FIELD_GET(ICE_CEE_PGID_PRIO_0_M, buf[offset]); offset++; } @@ -466,7 +453,7 @@ ice_parse_cee_app_tlv(struct ice_cee_feat_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) u8 i; typelen = ntohs(tlv->hdr.typelen); - len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S); + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); dcbcfg->numapps = len / sizeof(*app); if (!dcbcfg->numapps) @@ -521,14 +508,13 @@ ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) u32 ouisubtype; ouisubtype = ntohl(tlv->ouisubtype); - subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >> - ICE_LLDP_TLV_SUBTYPE_S); + subtype = FIELD_GET(ICE_LLDP_TLV_SUBTYPE_M, ouisubtype); /* Return if not CEE DCBX */ if (subtype != ICE_CEE_DCBX_TYPE) return; typelen = ntohs(tlv->typelen); - tlvlen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S); + tlvlen = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); len = sizeof(tlv->typelen) + sizeof(ouisubtype) + sizeof(struct ice_cee_ctrl_tlv); /* Return if no CEE DCBX Feature TLVs */ @@ -540,9 +526,8 @@ ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) u16 sublen; typelen = ntohs(sub_tlv->hdr.typelen); - sublen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S); - subtype = (u8)((typelen & ICE_LLDP_TLV_TYPE_M) >> - ICE_LLDP_TLV_TYPE_S); + sublen = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); + subtype = FIELD_GET(ICE_LLDP_TLV_TYPE_M, typelen); switch (subtype) { case ICE_CEE_SUBTYPE_PG_CFG: ice_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg); @@ -579,7 +564,7 @@ ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) u32 oui; ouisubtype = ntohl(tlv->ouisubtype); - oui = ((ouisubtype & ICE_LLDP_TLV_OUI_M) >> ICE_LLDP_TLV_OUI_S); + oui = FIELD_GET(ICE_LLDP_TLV_OUI_M, ouisubtype); switch (oui) { case ICE_IEEE_8021QAZ_OUI: ice_parse_ieee_tlv(tlv, dcbcfg); @@ -616,8 +601,8 @@ static int ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg) tlv = (struct ice_lldp_org_tlv *)lldpmib; while (1) { typelen = ntohs(tlv->typelen); - type = ((typelen & ICE_LLDP_TLV_TYPE_M) >> ICE_LLDP_TLV_TYPE_S); - len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S); + type = FIELD_GET(ICE_LLDP_TLV_TYPE_M, typelen); + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); offset += sizeof(typelen) + len; /* END TLV or beyond LLDPDU size */ @@ -806,11 +791,11 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, */ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) { dcbcfg->etscfg.prio_table[i * 2] = - ((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_0_M) >> - ICE_CEE_PGID_PRIO_0_S); + FIELD_GET(ICE_CEE_PGID_PRIO_0_M, + cee_cfg->oper_prio_tc[i]); dcbcfg->etscfg.prio_table[i * 2 + 1] = - ((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_1_M) >> - ICE_CEE_PGID_PRIO_1_S); + FIELD_GET(ICE_CEE_PGID_PRIO_1_M, + cee_cfg->oper_prio_tc[i]); } ice_for_each_traffic_class(i) { @@ -982,7 +967,7 @@ void ice_get_dcb_cfg_from_mib_change(struct ice_port_info *pi, mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw; - change_type = FIELD_GET(ICE_AQ_LLDP_MIB_TYPE_M, mib->type); + change_type = FIELD_GET(ICE_AQ_LLDP_MIB_TYPE_M, mib->type); if (change_type == ICE_AQ_LLDP_MIB_REMOTE) dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg; @@ -1483,7 +1468,7 @@ ice_dcb_cfg_to_lldp(u8 *lldpmib, u16 *miblen, struct ice_dcbx_cfg *dcbcfg) while (1) { ice_add_dcb_tlv(tlv, dcbcfg, tlvid++); typelen = ntohs(tlv->typelen); - len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); if (len) offset += len + 2; /* END TLV or beyond LLDPDU size */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 850db8e0e6b0..6e20ee610022 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -934,7 +934,7 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, skb->priority != TC_PRIO_CONTROL) { first->vid &= ~VLAN_PRIO_MASK; /* Mask the lower 3 bits to set the 802.1p priority */ - first->vid |= (skb->priority << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; + first->vid |= FIELD_PREP(VLAN_PRIO_MASK, skb->priority); /* if this is not already set it means a VLAN 0 + priority needs * to be offloaded */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index e1fbc6de452d..6d50b90a7359 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -227,7 +227,7 @@ static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay) u32 val; val = rd32(hw, PRTDCB_GENC); - *delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S); + *delay = FIELD_GET(PRTDCB_GENC_PFCLDA_M, val); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index ca118bc37e44..9069725c71b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -358,8 +358,8 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, off->cd_qw1 |= (cd_cmd | ICE_TX_DESC_DTYPE_CTX); } else { cd_cmd = ICE_TX_CTX_DESC_SWTCH_VSI << ICE_TXD_CTX_QW1_CMD_S; - dst_vsi = ((u64)dst->u.port_info.port_id << - ICE_TXD_CTX_QW1_VSI_S) & ICE_TXD_CTX_QW1_VSI_M; + dst_vsi = FIELD_PREP(ICE_TXD_CTX_QW1_VSI_M, + dst->u.port_info.port_id); off->cd_qw1 = cd_cmd | dst_vsi | ICE_TX_DESC_DTYPE_CTX; } } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 47ab37ba62d2..a19b06f18e40 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -129,7 +129,6 @@ static const struct ice_stats ice_gstrings_pf_stats[] = { ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize), ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber), ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error), - ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors), ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards), ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors), ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes), @@ -1847,14 +1846,14 @@ ice_phy_type_to_ethtool(struct net_device *netdev, linkmode_zero(ks->link_modes.supported); linkmode_zero(ks->link_modes.advertising); - for (i = 0; i < BITS_PER_TYPE(u64); i++) { + for (i = 0; i < ARRAY_SIZE(phy_type_low_lkup); i++) { if (phy_types_low & BIT_ULL(i)) ice_linkmode_set_bit(&phy_type_low_lkup[i], ks, req_speeds, advert_phy_type_lo, i); } - for (i = 0; i < BITS_PER_TYPE(u64); i++) { + for (i = 0; i < ARRAY_SIZE(phy_type_high_lkup); i++) { if (phy_types_high & BIT_ULL(i)) ice_linkmode_set_bit(&phy_type_high_lkup[i], ks, req_speeds, advert_phy_type_hi, diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 54e4536219aa..9a1a04f5f146 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -503,8 +503,7 @@ ice_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp, return -EINVAL; data->flex_word = value & ICE_USERDEF_FLEX_WORD_M; - data->flex_offset = (value & ICE_USERDEF_FLEX_OFFS_M) >> - ICE_USERDEF_FLEX_OFFS_S; + data->flex_offset = FIELD_GET(ICE_USERDEF_FLEX_OFFS_M, value); if (data->flex_offset > ICE_USERDEF_FLEX_MAX_OFFS_VAL) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index ae089d32ee9d..5840c3e04a5b 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -604,55 +604,32 @@ ice_set_fd_desc_val(struct ice_fd_fltr_desc_ctx *ctx, u64 qword; /* prep QW0 of FD filter programming desc */ - qword = ((u64)ctx->qindex << ICE_FXD_FLTR_QW0_QINDEX_S) & - ICE_FXD_FLTR_QW0_QINDEX_M; - qword |= ((u64)ctx->comp_q << ICE_FXD_FLTR_QW0_COMP_Q_S) & - ICE_FXD_FLTR_QW0_COMP_Q_M; - qword |= ((u64)ctx->comp_report << ICE_FXD_FLTR_QW0_COMP_REPORT_S) & - ICE_FXD_FLTR_QW0_COMP_REPORT_M; - qword |= ((u64)ctx->fd_space << ICE_FXD_FLTR_QW0_FD_SPACE_S) & - ICE_FXD_FLTR_QW0_FD_SPACE_M; - qword |= ((u64)ctx->cnt_index << ICE_FXD_FLTR_QW0_STAT_CNT_S) & - ICE_FXD_FLTR_QW0_STAT_CNT_M; - qword |= ((u64)ctx->cnt_ena << ICE_FXD_FLTR_QW0_STAT_ENA_S) & - ICE_FXD_FLTR_QW0_STAT_ENA_M; - qword |= ((u64)ctx->evict_ena << ICE_FXD_FLTR_QW0_EVICT_ENA_S) & - ICE_FXD_FLTR_QW0_EVICT_ENA_M; - qword |= ((u64)ctx->toq << ICE_FXD_FLTR_QW0_TO_Q_S) & - ICE_FXD_FLTR_QW0_TO_Q_M; - qword |= ((u64)ctx->toq_prio << ICE_FXD_FLTR_QW0_TO_Q_PRI_S) & - ICE_FXD_FLTR_QW0_TO_Q_PRI_M; - qword |= ((u64)ctx->dpu_recipe << ICE_FXD_FLTR_QW0_DPU_RECIPE_S) & - ICE_FXD_FLTR_QW0_DPU_RECIPE_M; - qword |= ((u64)ctx->drop << ICE_FXD_FLTR_QW0_DROP_S) & - ICE_FXD_FLTR_QW0_DROP_M; - qword |= ((u64)ctx->flex_prio << ICE_FXD_FLTR_QW0_FLEX_PRI_S) & - ICE_FXD_FLTR_QW0_FLEX_PRI_M; - qword |= ((u64)ctx->flex_mdid << ICE_FXD_FLTR_QW0_FLEX_MDID_S) & - ICE_FXD_FLTR_QW0_FLEX_MDID_M; - qword |= ((u64)ctx->flex_val << ICE_FXD_FLTR_QW0_FLEX_VAL_S) & - ICE_FXD_FLTR_QW0_FLEX_VAL_M; + qword = FIELD_PREP(ICE_FXD_FLTR_QW0_QINDEX_M, ctx->qindex); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_COMP_Q_M, ctx->comp_q); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_COMP_REPORT_M, ctx->comp_report); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_FD_SPACE_M, ctx->fd_space); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_STAT_CNT_M, ctx->cnt_index); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_STAT_ENA_M, ctx->cnt_ena); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_EVICT_ENA_M, ctx->evict_ena); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_TO_Q_M, ctx->toq); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_TO_Q_PRI_M, ctx->toq_prio); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_DPU_RECIPE_M, ctx->dpu_recipe); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_DROP_M, ctx->drop); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_FLEX_PRI_M, ctx->flex_prio); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_FLEX_MDID_M, ctx->flex_mdid); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW0_FLEX_VAL_M, ctx->flex_val); fdir_desc->qidx_compq_space_stat = cpu_to_le64(qword); /* prep QW1 of FD filter programming desc */ - qword = ((u64)ctx->dtype << ICE_FXD_FLTR_QW1_DTYPE_S) & - ICE_FXD_FLTR_QW1_DTYPE_M; - qword |= ((u64)ctx->pcmd << ICE_FXD_FLTR_QW1_PCMD_S) & - ICE_FXD_FLTR_QW1_PCMD_M; - qword |= ((u64)ctx->desc_prof_prio << ICE_FXD_FLTR_QW1_PROF_PRI_S) & - ICE_FXD_FLTR_QW1_PROF_PRI_M; - qword |= ((u64)ctx->desc_prof << ICE_FXD_FLTR_QW1_PROF_S) & - ICE_FXD_FLTR_QW1_PROF_M; - qword |= ((u64)ctx->fd_vsi << ICE_FXD_FLTR_QW1_FD_VSI_S) & - ICE_FXD_FLTR_QW1_FD_VSI_M; - qword |= ((u64)ctx->swap << ICE_FXD_FLTR_QW1_SWAP_S) & - ICE_FXD_FLTR_QW1_SWAP_M; - qword |= ((u64)ctx->fdid_prio << ICE_FXD_FLTR_QW1_FDID_PRI_S) & - ICE_FXD_FLTR_QW1_FDID_PRI_M; - qword |= ((u64)ctx->fdid_mdid << ICE_FXD_FLTR_QW1_FDID_MDID_S) & - ICE_FXD_FLTR_QW1_FDID_MDID_M; - qword |= ((u64)ctx->fdid << ICE_FXD_FLTR_QW1_FDID_S) & - ICE_FXD_FLTR_QW1_FDID_M; + qword = FIELD_PREP(ICE_FXD_FLTR_QW1_DTYPE_M, ctx->dtype); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_PCMD_M, ctx->pcmd); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_PROF_PRI_M, ctx->desc_prof_prio); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_PROF_M, ctx->desc_prof); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_FD_VSI_M, ctx->fd_vsi); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_SWAP_M, ctx->swap); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_FDID_PRI_M, ctx->fdid_prio); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_FDID_MDID_M, ctx->fdid_mdid); + qword |= FIELD_PREP(ICE_FXD_FLTR_QW1_FDID_M, ctx->fdid); fdir_desc->dtype_cmd_vsi_fdid = cpu_to_le64(qword); } diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 85c57ec315c4..20d5db88c99f 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1414,13 +1414,13 @@ ice_write_prof_mask_reg(struct ice_hw *hw, enum ice_block blk, u16 mask_idx, switch (blk) { case ICE_BLK_RSS: offset = GLQF_HMASK(mask_idx); - val = (idx << GLQF_HMASK_MSK_INDEX_S) & GLQF_HMASK_MSK_INDEX_M; - val |= (mask << GLQF_HMASK_MASK_S) & GLQF_HMASK_MASK_M; + val = FIELD_PREP(GLQF_HMASK_MSK_INDEX_M, idx); + val |= FIELD_PREP(GLQF_HMASK_MASK_M, mask); break; case ICE_BLK_FD: offset = GLQF_FDMASK(mask_idx); - val = (idx << GLQF_FDMASK_MSK_INDEX_S) & GLQF_FDMASK_MSK_INDEX_M; - val |= (mask << GLQF_FDMASK_MASK_S) & GLQF_FDMASK_MASK_M; + val = FIELD_PREP(GLQF_FDMASK_MSK_INDEX_M, idx); + val |= FIELD_PREP(GLQF_FDMASK_MASK_M, mask); break; default: ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index e126dbbd7b2c..cfac1d432c15 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -200,6 +200,8 @@ #define GLINT_VECT2FUNC_PF_NUM_M ICE_M(0x7, 12) #define GLINT_VECT2FUNC_IS_PF_S 16 #define GLINT_VECT2FUNC_IS_PF_M BIT(16) +#define PFINT_ALLOC 0x001D2600 +#define PFINT_ALLOC_FIRST ICE_M(0x7FF, 0) #define PFINT_FW_CTL 0x0016C800 #define PFINT_FW_CTL_MSIX_INDX_M ICE_M(0x7FF, 0) #define PFINT_FW_CTL_ITR_INDX_S 11 diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 280994ee5933..2a25323105e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -208,8 +208,7 @@ ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, eth_hdr = s_rule->hdr_data; ice_fill_eth_hdr(eth_hdr); - act |= (vsi_num << ICE_SINGLE_ACT_VSI_ID_S) & - ICE_SINGLE_ACT_VSI_ID_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi_num); s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); s_rule->recipe_id = cpu_to_le16(recipe_id); @@ -754,9 +753,7 @@ ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add) s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI | ICE_SINGLE_ACT_LAN_ENABLE | ICE_SINGLE_ACT_VALID_BIT | - ((vsi->vsi_num << - ICE_SINGLE_ACT_VSI_ID_S) & - ICE_SINGLE_ACT_VSI_ID_M)); + FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi->vsi_num)); s_rule->hdr_len = cpu_to_le16(ICE_LAG_SRIOV_TRAIN_PKT_LEN); memcpy(s_rule->hdr_data, lacp_train_pkt, LACP_TRAIN_PKT_LEN); opc = ice_aqc_opc_add_sw_rules; @@ -1981,6 +1978,8 @@ int ice_init_lag(struct ice_pf *pf) int n, err; ice_lag_init_feature_support_flag(pf); + if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) + return 0; pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL); if (!pf->lag) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 711e4fb62cb7..9be724291ef8 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -974,9 +974,8 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) /* Traffic from VSI can be sent to LAN */ ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; /* allow all untagged/tagged packets by default on Tx */ - ctxt->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL & - ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >> - ICE_AQ_VSI_INNER_VLAN_TX_MODE_S); + ctxt->info.inner_vlan_flags = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_TX_MODE_M, + ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL); /* SVM - by default bits 3 and 4 in inner_vlan_flags are 0's which * results in legacy behavior (show VLAN, DEI, and UP) in descriptor. * @@ -984,15 +983,14 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) */ if (ice_is_dvm_ena(hw)) { ctxt->info.inner_vlan_flags |= - ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING); ctxt->info.outer_vlan_flags = - (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << - ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & - ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M; + FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M, + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL); ctxt->info.outer_vlan_flags |= - (ICE_AQ_VSI_OUTER_TAG_VLAN_8100 << - ICE_AQ_VSI_OUTER_TAG_TYPE_S) & - ICE_AQ_VSI_OUTER_TAG_TYPE_M; + FIELD_PREP(ICE_AQ_VSI_OUTER_TAG_TYPE_M, + ICE_AQ_VSI_OUTER_TAG_VLAN_8100); ctxt->info.outer_vlan_flags |= FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_EMODE_M, ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING); @@ -1071,10 +1069,8 @@ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc; vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++; - qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & - ICE_AQ_VSI_TC_Q_OFFSET_M) | - ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & - ICE_AQ_VSI_TC_Q_NUM_M); + qmap = FIELD_PREP(ICE_AQ_VSI_TC_Q_OFFSET_M, offset); + qmap |= FIELD_PREP(ICE_AQ_VSI_TC_Q_NUM_M, pow); offset += num_rxq_per_tc; tx_count += num_txq_per_tc; ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); @@ -1157,18 +1153,14 @@ static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ctxt->info.max_fd_fltr_shared = cpu_to_le16(vsi->num_bfltr); /* default queue index within the VSI of the default FD */ - val = ((dflt_q << ICE_AQ_VSI_FD_DEF_Q_S) & - ICE_AQ_VSI_FD_DEF_Q_M); + val = FIELD_PREP(ICE_AQ_VSI_FD_DEF_Q_M, dflt_q); /* target queue or queue group to the FD filter */ - val |= ((dflt_q_group << ICE_AQ_VSI_FD_DEF_GRP_S) & - ICE_AQ_VSI_FD_DEF_GRP_M); + val |= FIELD_PREP(ICE_AQ_VSI_FD_DEF_GRP_M, dflt_q_group); ctxt->info.fd_def_q = cpu_to_le16(val); /* queue index on which FD filter completion is reported */ - val = ((report_q << ICE_AQ_VSI_FD_REPORT_Q_S) & - ICE_AQ_VSI_FD_REPORT_Q_M); + val = FIELD_PREP(ICE_AQ_VSI_FD_REPORT_Q_M, report_q); /* priority of the default qindex action */ - val |= ((dflt_q_prio << ICE_AQ_VSI_FD_DEF_PRIORITY_S) & - ICE_AQ_VSI_FD_DEF_PRIORITY_M); + val |= FIELD_PREP(ICE_AQ_VSI_FD_DEF_PRIORITY_M, dflt_q_prio); ctxt->info.fd_report_opt = cpu_to_le16(val); } @@ -1221,10 +1213,8 @@ ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix); pow = order_base_2(qcount); - qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & - ICE_AQ_VSI_TC_Q_OFFSET_M) | - ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & - ICE_AQ_VSI_TC_Q_NUM_M); + qmap = FIELD_PREP(ICE_AQ_VSI_TC_Q_OFFSET_M, offset); + qmap |= FIELD_PREP(ICE_AQ_VSI_TC_Q_NUM_M, pow); ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); @@ -1795,11 +1785,8 @@ ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, QRXFLXP_CNTXT_RXDID_PRIO_M | QRXFLXP_CNTXT_TS_M); - regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & - QRXFLXP_CNTXT_RXDID_IDX_M; - - regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) & - QRXFLXP_CNTXT_RXDID_PRIO_M; + regval |= FIELD_PREP(QRXFLXP_CNTXT_RXDID_IDX_M, rxdid); + regval |= FIELD_PREP(QRXFLXP_CNTXT_RXDID_PRIO_M, prio); if (ena_ts) /* Enable TimeSync on this queue */ @@ -2358,6 +2345,9 @@ static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) } else { max_txqs[i] = vsi->alloc_txq; } + + if (vsi->type == ICE_VSI_PF) + max_txqs[i] += vsi->num_xdp_txq; } dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc); @@ -2611,10 +2601,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; - if (vsi->agg_node) { - vsi->agg_node->valid = false; - vsi->agg_node->agg_id = 0; - } } /** @@ -3392,9 +3378,8 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1; pow = order_base_2(tc0_qcount); - qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & - ICE_AQ_VSI_TC_Q_OFFSET_M) | - ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); + qmap = FIELD_PREP(ICE_AQ_VSI_TC_Q_OFFSET_M, tc0_offset); + qmap |= FIELD_PREP(ICE_AQ_VSI_TC_Q_NUM_M, pow); ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b97d116650be..63a5fb701ada 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -980,7 +980,7 @@ static void ice_set_dflt_mib(struct ice_pf *pf) * Octets 13 - 20 are TSA values - leave as zeros */ buf[5] = 0x64; - len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); offset += len + 2; tlv = (struct ice_lldp_org_tlv *) ((char *)tlv + sizeof(tlv->typelen) + len); @@ -1014,7 +1014,7 @@ static void ice_set_dflt_mib(struct ice_pf *pf) /* Octet 1 left as all zeros - PFC disabled */ buf[0] = 0x08; - len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); offset += len + 2; if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL)) @@ -1771,14 +1771,10 @@ static void ice_handle_mdd_event(struct ice_pf *pf) /* find what triggered an MDD event */ reg = rd32(hw, GL_MDET_TX_PQM); if (reg & GL_MDET_TX_PQM_VALID_M) { - u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> - GL_MDET_TX_PQM_PF_NUM_S; - u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> - GL_MDET_TX_PQM_VF_NUM_S; - u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> - GL_MDET_TX_PQM_MAL_TYPE_S; - u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >> - GL_MDET_TX_PQM_QNUM_S); + u8 pf_num = FIELD_GET(GL_MDET_TX_PQM_PF_NUM_M, reg); + u16 vf_num = FIELD_GET(GL_MDET_TX_PQM_VF_NUM_M, reg); + u8 event = FIELD_GET(GL_MDET_TX_PQM_MAL_TYPE_M, reg); + u16 queue = FIELD_GET(GL_MDET_TX_PQM_QNUM_M, reg); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", @@ -1788,14 +1784,10 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw)); if (reg & GL_MDET_TX_TCLAN_VALID_M) { - u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> - GL_MDET_TX_TCLAN_PF_NUM_S; - u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> - GL_MDET_TX_TCLAN_VF_NUM_S; - u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> - GL_MDET_TX_TCLAN_MAL_TYPE_S; - u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> - GL_MDET_TX_TCLAN_QNUM_S); + u8 pf_num = FIELD_GET(GL_MDET_TX_TCLAN_PF_NUM_M, reg); + u16 vf_num = FIELD_GET(GL_MDET_TX_TCLAN_VF_NUM_M, reg); + u8 event = FIELD_GET(GL_MDET_TX_TCLAN_MAL_TYPE_M, reg); + u16 queue = FIELD_GET(GL_MDET_TX_TCLAN_QNUM_M, reg); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", @@ -1805,14 +1797,10 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, GL_MDET_RX); if (reg & GL_MDET_RX_VALID_M) { - u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >> - GL_MDET_RX_PF_NUM_S; - u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> - GL_MDET_RX_VF_NUM_S; - u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >> - GL_MDET_RX_MAL_TYPE_S; - u16 queue = ((reg & GL_MDET_RX_QNUM_M) >> - GL_MDET_RX_QNUM_S); + u8 pf_num = FIELD_GET(GL_MDET_RX_PF_NUM_M, reg); + u16 vf_num = FIELD_GET(GL_MDET_RX_VF_NUM_M, reg); + u8 event = FIELD_GET(GL_MDET_RX_MAL_TYPE_M, reg); + u16 queue = FIELD_GET(GL_MDET_RX_QNUM_M, reg); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", @@ -3058,6 +3046,7 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) static void ice_ena_misc_vector(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; + u32 pf_intr_start_offset; u32 val; /* Disable anti-spoof detection interrupt to prevent spurious event @@ -3086,6 +3075,47 @@ static void ice_ena_misc_vector(struct ice_pf *pf) /* SW_ITR_IDX = 0, but don't change INTENA */ wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); + + if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + return; + pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST; + wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset), + GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); +} + +/** + * ice_ll_ts_intr - ll_ts interrupt handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) +{ + struct ice_pf *pf = data; + u32 pf_intr_start_offset; + struct ice_ptp_tx *tx; + unsigned long flags; + struct ice_hw *hw; + u32 val; + u8 idx; + + hw = &pf->hw; + tx = &pf->ptp.port.tx; + spin_lock_irqsave(&tx->lock, flags); + ice_ptp_complete_tx_single_tstamp(tx); + + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + spin_unlock_irqrestore(&tx->lock, flags); + + val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | + (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); + pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST; + wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset), + val); + + return IRQ_HANDLED; } /** @@ -3096,6 +3126,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf) static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) { struct ice_pf *pf = (struct ice_pf *)data; + irqreturn_t ret = IRQ_HANDLED; struct ice_hw *hw = &pf->hw; struct device *dev; u32 oicr, ena_mask; @@ -3135,8 +3166,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) /* we have a reset warning */ ena_mask &= ~PFINT_OICR_GRST_M; - reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> - GLGEN_RSTAT_RESET_TYPE_S; + reset = FIELD_GET(GLGEN_RSTAT_RESET_TYPE_M, + rd32(hw, GLGEN_RSTAT)); if (reset == ICE_RESET_CORER) pf->corer_count++; @@ -3177,8 +3208,22 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (ice_ptp_pf_handles_tx_interrupt(pf)) + if (ice_pf_state_is_nominal(pf) && + pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) { + struct ice_ptp_tx *tx = &pf->ptp.port.tx; + unsigned long flags; + u8 idx; + + spin_lock_irqsave(&tx->lock, flags); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + spin_unlock_irqrestore(&tx->lock, flags); + } else if (ice_ptp_pf_handles_tx_interrupt(pf)) { set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); + ret = IRQ_WAKE_THREAD; + } } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -3194,7 +3239,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) GLTSYN_STAT_EVENT1_M | GLTSYN_STAT_EVENT2_M); - set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread); + ice_ptp_extts_event(pf); } } @@ -3217,8 +3262,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) set_bit(ICE_PFR_REQ, pf->state); } } + ice_service_task_schedule(pf); + if (ret == IRQ_HANDLED) + ice_irq_dynamic_ena(hw, NULL, NULL); - return IRQ_WAKE_THREAD; + return ret; } /** @@ -3234,12 +3282,7 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) hw = &pf->hw; if (ice_is_reset_in_progress(pf->state)) - return IRQ_HANDLED; - - ice_service_task_schedule(pf); - - if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread)) - ice_ptp_extts_event(pf); + goto skip_irq; if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { /* Process outstanding Tx timestamps. If there is more work, @@ -3251,6 +3294,7 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) } } +skip_irq: ice_irq_dynamic_ena(hw, NULL, NULL); return IRQ_HANDLED; @@ -3281,6 +3325,20 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) } /** + * ice_free_irq_msix_ll_ts- Unroll ll_ts vector setup + * @pf: board private structure + */ +static void ice_free_irq_msix_ll_ts(struct ice_pf *pf) +{ + int irq_num = pf->ll_ts_irq.virq; + + synchronize_irq(irq_num); + devm_free_irq(ice_pf_to_dev(pf), irq_num, pf); + + ice_free_irq(pf, pf->ll_ts_irq); +} + +/** * ice_free_irq_msix_misc - Unroll misc vector setup * @pf: board private structure */ @@ -3299,6 +3357,8 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); ice_free_irq(pf, pf->oicr_irq); + if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + ice_free_irq_msix_ll_ts(pf); } /** @@ -3324,10 +3384,12 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx) PFINT_MBX_CTL_CAUSE_ENA_M); wr32(hw, PFINT_MBX_CTL, val); - /* This enables Sideband queue Interrupt causes */ - val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | - PFINT_SB_CTL_CAUSE_ENA_M); - wr32(hw, PFINT_SB_CTL, val); + if (!hw->dev_caps.ts_dev_info.ts_ll_int_read) { + /* enable Sideband queue Interrupt causes */ + val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | + PFINT_SB_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_SB_CTL, val); + } ice_flush(hw); } @@ -3344,13 +3406,17 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - struct msi_map oicr_irq; + u32 pf_intr_start_offset; + struct msi_map irq; int err = 0; if (!pf->int_name[0]) snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", dev_driver_string(dev), dev_name(dev)); + if (!pf->int_name_ll_ts[0]) + snprintf(pf->int_name_ll_ts, sizeof(pf->int_name_ll_ts) - 1, + "%s-%s:ll_ts", dev_driver_string(dev), dev_name(dev)); /* Do not request IRQ but do enable OICR interrupt since settings are * lost during reset. Note that this function is called only during * rebuild path and not while reset is in progress. @@ -3359,11 +3425,11 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) goto skip_req_irq; /* reserve one vector in irq_tracker for misc interrupts */ - oicr_irq = ice_alloc_irq(pf, false); - if (oicr_irq.index < 0) - return oicr_irq.index; + irq = ice_alloc_irq(pf, false); + if (irq.index < 0) + return irq.index; - pf->oicr_irq = oicr_irq; + pf->oicr_irq = irq; err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr, ice_misc_intr_thread_fn, 0, pf->int_name, pf); @@ -3374,10 +3440,34 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) return err; } + /* reserve one vector in irq_tracker for ll_ts interrupt */ + if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + goto skip_req_irq; + + irq = ice_alloc_irq(pf, false); + if (irq.index < 0) + return irq.index; + + pf->ll_ts_irq = irq; + err = devm_request_irq(dev, pf->ll_ts_irq.virq, ice_ll_ts_intr, 0, + pf->int_name_ll_ts, pf); + if (err) { + dev_err(dev, "devm_request_irq for %s failed: %d\n", + pf->int_name_ll_ts, err); + ice_free_irq(pf, pf->ll_ts_irq); + return err; + } + skip_req_irq: ice_ena_misc_vector(pf); ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index); + /* This enables LL TS interrupt */ + pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST; + if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + wr32(hw, PFINT_SB_CTL, + ((pf->ll_ts_irq.index + pf_intr_start_offset) & + PFINT_SB_CTL_MSIX_INDX_M) | PFINT_SB_CTL_CAUSE_ENA_M); wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index), ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); @@ -6744,13 +6834,11 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) cur_ns->rx_crc_errors = pf->stats.crc_errors; cur_ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes + - pf->stats.rx_len_errors + pf->stats.rx_undersize + pf->hw_csum_rx_error + pf->stats.rx_jabber + pf->stats.rx_fragments + pf->stats.rx_oversize; - cur_ns->rx_length_errors = pf->stats.rx_len_errors; /* record drops from the port level */ cur_ns->rx_missed_errors = pf->stats.eth.rx_discards; } @@ -6890,9 +6978,6 @@ void ice_update_pf_stats(struct ice_pf *pf) &prev_ps->mac_remote_faults, &cur_ps->mac_remote_faults); - ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, - &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); - ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, &prev_ps->rx_undersize, &cur_ps->rx_undersize); @@ -8016,8 +8101,8 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct ice_hw *hw = &pf->hw; u32 head, val = 0; - head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) & - QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S; + head = FIELD_GET(QTX_COMM_HEAD_HEAD_M, + rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue]))); /* Read interrupt register */ val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index f6f52a248066..d4e05d2cb30c 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -571,8 +571,8 @@ ice_get_nvm_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_nv return status; } - nvm->major = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; - nvm->minor = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; + nvm->major = FIELD_GET(ICE_NVM_VER_HI_MASK, ver); + nvm->minor = FIELD_GET(ICE_NVM_VER_LO_MASK, ver); status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); if (status) { @@ -706,9 +706,9 @@ ice_get_orom_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_o combo_ver = le32_to_cpu(civd.combo_ver); - orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >> ICE_OROM_VER_SHIFT); - orom->patch = (u8)(combo_ver & ICE_OROM_VER_PATCH_MASK); - orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >> ICE_OROM_VER_BUILD_SHIFT); + orom->major = FIELD_GET(ICE_OROM_VER_MASK, combo_ver); + orom->patch = FIELD_GET(ICE_OROM_VER_PATCH_MASK, combo_ver); + orom->build = FIELD_GET(ICE_OROM_VER_BUILD_MASK, combo_ver); return 0; } @@ -950,7 +950,8 @@ static int ice_determine_active_flash_banks(struct ice_hw *hw) } /* Check that the control word indicates validity */ - if ((ctrl_word & ICE_SR_CTRL_WORD_1_M) >> ICE_SR_CTRL_WORD_1_S != ICE_SR_CTRL_WORD_VALID) { + if (FIELD_GET(ICE_SR_CTRL_WORD_1_M, ctrl_word) != + ICE_SR_CTRL_WORD_VALID) { ice_debug(hw, ICE_DBG_NVM, "Shadow RAM control word is invalid\n"); return -EIO; } @@ -1027,7 +1028,7 @@ int ice_init_nvm(struct ice_hw *hw) * as the blank mode may be used in the factory line. */ gens_stat = rd32(hw, GLNVM_GENS); - sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S; + sr_size = FIELD_GET(GLNVM_GENS_SR_SIZE_M, gens_stat); /* Switching to words (sr_size contains power of 2) */ flash->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 239cf8a2ee80..3b6605c8585e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -525,6 +525,119 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) } /** + * ice_ptp_req_tx_single_tstamp - Request Tx timestamp for a port from FW + * @tx: the PTP Tx timestamp tracker + * @idx: index of the timestamp to request + */ +void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx) +{ + struct ice_ptp_port *ptp_port; + struct sk_buff *skb; + struct ice_pf *pf; + + if (!tx->init) + return; + + ptp_port = container_of(tx, struct ice_ptp_port, tx); + pf = ptp_port_to_pf(ptp_port); + + /* Drop packets which have waited for more than 2 seconds */ + if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) { + /* Count the number of Tx timestamps that timed out */ + pf->ptp.tx_hwtstamp_timeouts++; + + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; + clear_bit(idx, tx->in_use); + + dev_kfree_skb_any(skb); + return; + } + + ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); + + /* Write TS index to read to the PF register so the FW can read it */ + wr32(&pf->hw, PF_SB_ATQBAL, + TS_LL_READ_TS_INTR | FIELD_PREP(TS_LL_READ_TS_IDX, idx) | + TS_LL_READ_TS); + tx->last_ll_ts_idx_read = idx; +} + +/** + * ice_ptp_complete_tx_single_tstamp - Complete Tx timestamp for a port + * @tx: the PTP Tx timestamp tracker + */ +void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx) +{ + struct skb_shared_hwtstamps shhwtstamps = {}; + u8 idx = tx->last_ll_ts_idx_read; + struct ice_ptp_port *ptp_port; + u64 raw_tstamp, tstamp; + bool drop_ts = false; + struct sk_buff *skb; + struct ice_pf *pf; + u32 val; + + if (!tx->init || tx->last_ll_ts_idx_read < 0) + return; + + ptp_port = container_of(tx, struct ice_ptp_port, tx); + pf = ptp_port_to_pf(ptp_port); + + ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); + + val = rd32(&pf->hw, PF_SB_ATQBAL); + + /* When the bit is cleared, the TS is ready in the register */ + if (val & TS_LL_READ_TS) { + dev_err(ice_pf_to_dev(pf), "Failed to get the Tx tstamp - FW not ready"); + return; + } + + /* High 8 bit value of the TS is on the bits 16:23 */ + raw_tstamp = FIELD_GET(TS_LL_READ_TS_HIGH, val); + raw_tstamp <<= 32; + + /* Read the low 32 bit value */ + raw_tstamp |= (u64)rd32(&pf->hw, PF_SB_ATQBAH); + + /* For PHYs which don't implement a proper timestamp ready bitmap, + * verify that the timestamp value is different from the last cached + * timestamp. If it is not, skip this for now assuming it hasn't yet + * been captured by hardware. + */ + if (!drop_ts && tx->verify_cached && + raw_tstamp == tx->tstamps[idx].cached_tstamp) + return; + + if (tx->verify_cached && raw_tstamp) + tx->tstamps[idx].cached_tstamp = raw_tstamp; + clear_bit(idx, tx->in_use); + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; + if (test_and_clear_bit(idx, tx->stale)) + drop_ts = true; + + if (!skb) + return; + + if (drop_ts) { + dev_kfree_skb_any(skb); + return; + } + + /* Extend the timestamp using cached PHC time */ + tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); + if (tstamp) { + shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + ice_trace(tx_tstamp_complete, skb, idx); + } + + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); +} + +/** * ice_ptp_process_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * @@ -575,6 +688,7 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; + unsigned long flags; struct ice_pf *pf; struct ice_hw *hw; u64 tstamp_ready; @@ -646,7 +760,7 @@ static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) drop_ts = true; skip_ts_read: - spin_lock(&tx->lock); + spin_lock_irqsave(&tx->lock, flags); if (tx->verify_cached && raw_tstamp) tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); @@ -654,7 +768,7 @@ skip_ts_read: tx->tstamps[idx].skb = NULL; if (test_and_clear_bit(idx, tx->stale)) drop_ts = true; - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); /* It is unlikely but possible that the SKB will have been * flushed at this point due to link change or teardown. @@ -724,6 +838,7 @@ static enum ice_tx_tstamp_work ice_ptp_tx_tstamp_owner(struct ice_pf *pf) static enum ice_tx_tstamp_work ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) { bool more_timestamps; + unsigned long flags; if (!tx->init) return ICE_TX_TSTAMP_WORK_DONE; @@ -732,9 +847,9 @@ static enum ice_tx_tstamp_work ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) ice_ptp_process_tx_tstamp(tx); /* Check if there are outstanding Tx timestamps */ - spin_lock(&tx->lock); + spin_lock_irqsave(&tx->lock, flags); more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); if (more_timestamps) return ICE_TX_TSTAMP_WORK_PENDING; @@ -771,6 +886,7 @@ ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) tx->in_use = in_use; tx->stale = stale; tx->init = 1; + tx->last_ll_ts_idx_read = -1; spin_lock_init(&tx->lock); @@ -788,6 +904,7 @@ static void ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { struct ice_hw *hw = &pf->hw; + unsigned long flags; u64 tstamp_ready; int err; u8 idx; @@ -811,12 +928,12 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx))) ice_clear_phy_tstamp(hw, tx->block, phy_idx); - spin_lock(&tx->lock); + spin_lock_irqsave(&tx->lock, flags); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; clear_bit(idx, tx->in_use); clear_bit(idx, tx->stale); - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); /* Count the number of Tx timestamps flushed */ pf->ptp.tx_hwtstamp_flushed++; @@ -840,9 +957,11 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static void ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx) { - spin_lock(&tx->lock); + unsigned long flags; + + spin_lock_irqsave(&tx->lock, flags); bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len); - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); } /** @@ -855,9 +974,11 @@ ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx) static void ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { - spin_lock(&tx->lock); + unsigned long flags; + + spin_lock_irqsave(&tx->lock, flags); tx->init = 0; - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); /* wait for potentially outstanding interrupt to complete */ synchronize_irq(pf->oicr_irq.virq); @@ -1140,9 +1261,9 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) } if (offs & 0x1) - phy_sts = (val & Q_REG_FIFO13_M) >> Q_REG_FIFO13_S; + phy_sts = FIELD_GET(Q_REG_FIFO13_M, val); else - phy_sts = (val & Q_REG_FIFO02_M) >> Q_REG_FIFO02_S; + phy_sts = FIELD_GET(Q_REG_FIFO02_M, val); if (phy_sts & FIFO_EMPTY) { port->tx_fifo_busy_cnt = FIFO_OK; @@ -1257,6 +1378,7 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) struct ice_pf *pf = ptp_port_to_pf(ptp_port); u8 port = ptp_port->port_num; struct ice_hw *hw = &pf->hw; + unsigned long flags; int err; if (ice_is_e810(hw)) @@ -1270,9 +1392,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_cancel_delayed_work_sync(&ptp_port->ov_work); /* temporarily disable Tx timestamps while calibrating PHY offset */ - spin_lock(&ptp_port->tx.lock); + spin_lock_irqsave(&ptp_port->tx.lock, flags); ptp_port->tx.calibrating = true; - spin_unlock(&ptp_port->tx.lock); + spin_unlock_irqrestore(&ptp_port->tx.lock, flags); ptp_port->tx_fifo_busy_cnt = 0; /* Start the PHY timer in Vernier mode */ @@ -1281,9 +1403,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) goto out_unlock; /* Enable Tx timestamps right away */ - spin_lock(&ptp_port->tx.lock); + spin_lock_irqsave(&ptp_port->tx.lock, flags); ptp_port->tx.calibrating = false; - spin_unlock(&ptp_port->tx.lock); + spin_unlock_irqrestore(&ptp_port->tx.lock, flags); kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); @@ -1359,8 +1481,8 @@ static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold) if (ena) { val |= Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M; val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_THR_M; - val |= ((threshold << Q_REG_TX_MEM_GBL_CFG_INTR_THR_S) & - Q_REG_TX_MEM_GBL_CFG_INTR_THR_M); + val |= FIELD_PREP(Q_REG_TX_MEM_GBL_CFG_INTR_THR_M, + threshold); } else { val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M; } @@ -1505,8 +1627,7 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, * + num_in_channels * tmr_idx */ func = 1 + chan + (tmr_idx * 3); - gpio_reg = ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & - GLGEN_GPIO_CTL_PIN_FUNC_M); + gpio_reg = FIELD_PREP(GLGEN_GPIO_CTL_PIN_FUNC_M, func); pf->ptp.ext_ts_chan |= (1 << chan); } else { /* clear the values we set to reset defaults */ @@ -1616,7 +1737,7 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, /* 4. write GPIO CTL reg */ func = 8 + chan + (tmr_idx * 4); val = GLGEN_GPIO_CTL_PIN_DIR_M | - ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & GLGEN_GPIO_CTL_PIN_FUNC_M); + FIELD_PREP(GLGEN_GPIO_CTL_PIN_FUNC_M, func); wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val); /* Store the value if requested */ @@ -2374,18 +2495,23 @@ static long ice_ptp_create_clock(struct ice_pf *pf) */ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { + unsigned long flags; u8 idx; - spin_lock(&tx->lock); + spin_lock_irqsave(&tx->lock, flags); /* Check that this tracker is accepting new timestamp requests */ if (!ice_ptp_is_tx_tracker_up(tx)) { - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); return -1; } /* Find and set the first available index */ - idx = find_first_zero_bit(tx->in_use, tx->len); + idx = find_next_zero_bit(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx == tx->len) + idx = find_first_zero_bit(tx->in_use, tx->len); + if (idx < tx->len) { /* We got a valid index that no other thread could have set. Store * a reference to the skb and the start time to allow discarding old @@ -2399,7 +2525,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) ice_trace(tx_tstamp_request, skb, idx); } - spin_unlock(&tx->lock); + spin_unlock_irqrestore(&tx->lock, flags); /* return the appropriate PHY timestamp register index, -1 if no * indexes were available. @@ -2737,6 +2863,8 @@ static int ice_ptp_register_auxbus_driver(struct ice_pf *pf) name = devm_kasprintf(dev, GFP_KERNEL, "ptp_aux_dev_%u_%u_clk%u", pf->pdev->bus->number, PCI_SLOT(pf->pdev->devfn), ice_get_ptp_src_clock_index(&pf->hw)); + if (!name) + return -ENOMEM; aux_driver->name = name; aux_driver->shutdown = ice_ptp_auxbus_shutdown; @@ -2983,6 +3111,8 @@ static int ice_ptp_create_auxbus_device(struct ice_pf *pf) name = devm_kasprintf(dev, GFP_KERNEL, "ptp_aux_dev_%u_%u_clk%u", pf->pdev->bus->number, PCI_SLOT(pf->pdev->devfn), ice_get_ptp_src_clock_index(&pf->hw)); + if (!name) + return -ENOMEM; aux_dev->name = name; aux_dev->id = id; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 032653a7a133..087dd32d8762 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -131,6 +131,7 @@ enum ice_tx_tstamp_work { * @calibrating: if true, the PHY is calibrating the Tx offset. During this * window, timestamps are temporarily disabled. * @verify_cached: if true, verify new timestamp differs from last read value + * @last_ll_ts_idx_read: index of the last LL TS read by the FW */ struct ice_ptp_tx { spinlock_t lock; /* lock protecting in_use bitmap */ @@ -143,6 +144,7 @@ struct ice_ptp_tx { u8 init : 1; u8 calibrating : 1; u8 verify_cached : 1; + s8 last_ll_ts_idx_read; }; /* Quad and port information for initializing timestamp blocks */ @@ -296,6 +298,8 @@ void ice_ptp_restore_timestamp_mode(struct ice_pf *pf); void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); +void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx); +void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx); enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, @@ -324,6 +328,11 @@ ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) return -1; } +static inline void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx) +{ } + +static inline void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx) { } + static inline bool ice_ptp_process_ts(struct ice_pf *pf) { return true; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index e976138e934a..1f3e03124430 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -509,6 +509,7 @@ int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id, #define TS_LL_READ_RETRIES 200 #define TS_LL_READ_TS_HIGH GENMASK(23, 16) #define TS_LL_READ_TS_IDX GENMASK(29, 24) +#define TS_LL_READ_TS_INTR BIT(30) #define TS_LL_READ_TS BIT(31) /* Internal PHY timestamp address */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2f4a621254e8..d174a4eeb899 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1387,8 +1387,7 @@ void ice_sched_get_psm_clk_freq(struct ice_hw *hw) u32 val, clk_src; val = rd32(hw, GLGEN_CLKSTAT_SRC); - clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >> - GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S; + clk_src = FIELD_GET(GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M, val); #define PSM_CLK_SRC_367_MHZ 0x0 #define PSM_CLK_SRC_416_MHZ 0x1 diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 6d33dd647c78..a94a1c48c3de 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -106,10 +106,8 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) for (v = first; v <= last; v++) { u32 reg; - reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) & - GLINT_VECT2FUNC_IS_PF_M) | - ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & - GLINT_VECT2FUNC_PF_NUM_M)); + reg = FIELD_PREP(GLINT_VECT2FUNC_IS_PF_M, 1) | + FIELD_PREP(GLINT_VECT2FUNC_PF_NUM_M, hw->pf_id); wr32(hw, GLINT_VECT2FUNC(v), reg); } @@ -275,24 +273,20 @@ static void ice_ena_vf_msix_mappings(struct ice_vf *vf) (device_based_first_msix + vf->num_msix) - 1; device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id; - reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) & - VPINT_ALLOC_FIRST_M) | - ((device_based_last_msix << VPINT_ALLOC_LAST_S) & - VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M); + reg = FIELD_PREP(VPINT_ALLOC_FIRST_M, device_based_first_msix) | + FIELD_PREP(VPINT_ALLOC_LAST_M, device_based_last_msix) | + VPINT_ALLOC_VALID_M; wr32(hw, VPINT_ALLOC(vf->vf_id), reg); - reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S) - & VPINT_ALLOC_PCI_FIRST_M) | - ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) & - VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M); + reg = FIELD_PREP(VPINT_ALLOC_PCI_FIRST_M, device_based_first_msix) | + FIELD_PREP(VPINT_ALLOC_PCI_LAST_M, device_based_last_msix) | + VPINT_ALLOC_PCI_VALID_M; wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg); /* map the interrupts to its functions */ for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) { - reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) & - GLINT_VECT2FUNC_VF_NUM_M) | - ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & - GLINT_VECT2FUNC_PF_NUM_M)); + reg = FIELD_PREP(GLINT_VECT2FUNC_VF_NUM_M, device_based_vf_id) | + FIELD_PREP(GLINT_VECT2FUNC_PF_NUM_M, hw->pf_id); wr32(hw, GLINT_VECT2FUNC(v), reg); } @@ -325,10 +319,8 @@ static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq) * VFNUMQ value should be set to (number of queues - 1). A value * of 0 means 1 queue and a value of 255 means 256 queues */ - reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) & - VPLAN_TX_QBASE_VFFIRSTQ_M) | - (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) & - VPLAN_TX_QBASE_VFNUMQ_M)); + reg = FIELD_PREP(VPLAN_TX_QBASE_VFFIRSTQ_M, vsi->txq_map[0]) | + FIELD_PREP(VPLAN_TX_QBASE_VFNUMQ_M, max_txq - 1); wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg); } else { dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); @@ -343,10 +335,8 @@ static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq) * VFNUMQ value should be set to (number of queues - 1). A value * of 0 means 1 queue and a value of 255 means 256 queues */ - reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) & - VPLAN_RX_QBASE_VFFIRSTQ_M) | - (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) & - VPLAN_RX_QBASE_VFNUMQ_M)); + reg = FIELD_PREP(VPLAN_RX_QBASE_VFFIRSTQ_M, vsi->rxq_map[0]) | + FIELD_PREP(VPLAN_RX_QBASE_VFNUMQ_M, max_rxq - 1); wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg); } else { dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); @@ -772,24 +762,6 @@ static void ice_sriov_clear_reset_trigger(struct ice_vf *vf) } /** - * ice_sriov_create_vsi - Create a new VSI for a VF - * @vf: VF to create the VSI for - * - * This is called by ice_vf_recreate_vsi to create the new VSI after the old - * VSI has been released. - */ -static int ice_sriov_create_vsi(struct ice_vf *vf) -{ - struct ice_vsi *vsi; - - vsi = ice_vf_vsi_setup(vf); - if (!vsi) - return -ENOMEM; - - return 0; -} - -/** * ice_sriov_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt * @vf: VF to perform tasks on */ @@ -808,7 +780,6 @@ static const struct ice_vf_ops ice_sriov_vf_ops = { .poll_reset_status = ice_sriov_poll_reset_status, .clear_reset_trigger = ice_sriov_clear_reset_trigger, .irq_close = NULL, - .create_vsi = ice_sriov_create_vsi, .post_vsi_rebuild = ice_sriov_post_vsi_rebuild, }; @@ -1151,8 +1122,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (vf->first_vector_idx < 0) goto unroll; - ice_vf_vsi_release(vf); - if (vf->vf_ops->create_vsi(vf)) { + if (ice_vf_reconfig_vsi(vf)) { /* Try to rebuild with previous values */ needs_rebuild = true; goto unroll; @@ -1179,7 +1149,7 @@ unroll: return -EINVAL; if (needs_rebuild) - vf->vf_ops->create_vsi(vf); + ice_vf_reconfig_vsi(vf); ice_ena_vf_mappings(vf); ice_put_vf(vf); @@ -1328,8 +1298,7 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq); /* event returns device global Rx queue number */ - queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >> - GLDCB_RTCTQ_RXQNUM_S; + queue = FIELD_GET(GLDCB_RTCTQ_RXQNUM_M, gldcb_rtctq); vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue)); if (!vf) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index ee19f3aa3d19..f84bab80ca42 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2492,25 +2492,24 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, switch (f_info->fltr_act) { case ICE_FWD_TO_VSI: - act |= (f_info->fwd_id.hw_vsi_id << ICE_SINGLE_ACT_VSI_ID_S) & - ICE_SINGLE_ACT_VSI_ID_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, + f_info->fwd_id.hw_vsi_id); if (f_info->lkup_type != ICE_SW_LKUP_VLAN) act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; break; case ICE_FWD_TO_VSI_LIST: act |= ICE_SINGLE_ACT_VSI_LIST; - act |= (f_info->fwd_id.vsi_list_id << - ICE_SINGLE_ACT_VSI_LIST_ID_S) & - ICE_SINGLE_ACT_VSI_LIST_ID_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_LIST_ID_M, + f_info->fwd_id.vsi_list_id); if (f_info->lkup_type != ICE_SW_LKUP_VLAN) act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; break; case ICE_FWD_TO_Q: act |= ICE_SINGLE_ACT_TO_Q; - act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & - ICE_SINGLE_ACT_Q_INDEX_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_Q_INDEX_M, + f_info->fwd_id.q_id); break; case ICE_DROP_PACKET: act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP | @@ -2520,10 +2519,9 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, q_rgn = f_info->qgrp_size > 0 ? (u8)ilog2(f_info->qgrp_size) : 0; act |= ICE_SINGLE_ACT_TO_Q; - act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & - ICE_SINGLE_ACT_Q_INDEX_M; - act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) & - ICE_SINGLE_ACT_Q_REGION_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_Q_INDEX_M, + f_info->fwd_id.q_id); + act |= FIELD_PREP(ICE_SINGLE_ACT_Q_REGION_M, q_rgn); break; default: return; @@ -2649,7 +2647,7 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, m_ent->fltr_info.fwd_id.hw_vsi_id; act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT; - act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M; + act |= FIELD_PREP(ICE_LG_ACT_VSI_LIST_ID_M, id); if (m_ent->vsi_count > 1) act |= ICE_LG_ACT_VSI_LIST; lg_act->act[0] = cpu_to_le32(act); @@ -2657,16 +2655,15 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, /* Second action descriptor type */ act = ICE_LG_ACT_GENERIC; - act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; + act |= FIELD_PREP(ICE_LG_ACT_GENERIC_VALUE_M, 1); lg_act->act[1] = cpu_to_le32(act); - act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << - ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; + act = FIELD_PREP(ICE_LG_ACT_GENERIC_OFFSET_M, + ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX); /* Third action Marker value */ act |= ICE_LG_ACT_GENERIC; - act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & - ICE_LG_ACT_GENERIC_VALUE_M; + act |= FIELD_PREP(ICE_LG_ACT_GENERIC_VALUE_M, sw_marker); lg_act->act[2] = cpu_to_le32(act); @@ -2675,9 +2672,9 @@ ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, ice_aqc_opc_update_sw_rules); /* Update the action to point to the large action ID */ - rx_tx->act = cpu_to_le32(ICE_SINGLE_ACT_PTR | - ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) & - ICE_SINGLE_ACT_PTR_VAL_M)); + act = ICE_SINGLE_ACT_PTR; + act |= FIELD_PREP(ICE_SINGLE_ACT_PTR_VAL_M, l_id); + rx_tx->act = cpu_to_le32(act); /* Use the filter rule ID of the previously created rule with single * act. Once the update happens, hardware will treat this as large @@ -4426,8 +4423,8 @@ ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, int status; buf->num_elems = cpu_to_le16(num_items); - buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) & - ICE_AQC_RES_TYPE_M) | alloc_shared); + buf->res_type = cpu_to_le16(FIELD_PREP(ICE_AQC_RES_TYPE_M, type) | + alloc_shared); status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_alloc_res); if (status) @@ -4454,8 +4451,8 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, int status; buf->num_elems = cpu_to_le16(num_items); - buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) & - ICE_AQC_RES_TYPE_M) | alloc_shared); + buf->res_type = cpu_to_le16(FIELD_PREP(ICE_AQC_RES_TYPE_M, type) | + alloc_shared); buf->elem[0].e.sw_resp = cpu_to_le16(counter_id); status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_free_res); @@ -4481,18 +4478,15 @@ int ice_share_res(struct ice_hw *hw, u16 type, u8 shared, u16 res_id) { DEFINE_FLEX(struct ice_aqc_alloc_free_res_elem, buf, elem, 1); u16 buf_len = __struct_size(buf); + u16 res_type; int status; buf->num_elems = cpu_to_le16(1); + res_type = FIELD_PREP(ICE_AQC_RES_TYPE_M, type); if (shared) - buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) & - ICE_AQC_RES_TYPE_M) | - ICE_AQC_RES_TYPE_FLAG_SHARED); - else - buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) & - ICE_AQC_RES_TYPE_M) & - ~ICE_AQC_RES_TYPE_FLAG_SHARED); + res_type |= ICE_AQC_RES_TYPE_FLAG_SHARED; + buf->res_type = cpu_to_le16(res_type); buf->elem[0].e.sw_resp = cpu_to_le16(res_id); status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_share_res); @@ -5024,8 +5018,8 @@ ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm, entry->chain_idx = chain_idx; content->result_indx = ICE_AQ_RECIPE_RESULT_EN | - ((chain_idx << ICE_AQ_RECIPE_RESULT_DATA_S) & - ICE_AQ_RECIPE_RESULT_DATA_M); + FIELD_PREP(ICE_AQ_RECIPE_RESULT_DATA_M, + chain_idx); clear_bit(chain_idx, result_idx_bm); chain_idx = find_first_bit(result_idx_bm, ICE_MAX_FV_WORDS); @@ -6065,6 +6059,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, rinfo->sw_act.fltr_act == ICE_FWD_TO_Q || rinfo->sw_act.fltr_act == ICE_FWD_TO_QGRP || rinfo->sw_act.fltr_act == ICE_DROP_PACKET || + rinfo->sw_act.fltr_act == ICE_MIRROR_PACKET || rinfo->sw_act.fltr_act == ICE_NOP)) { status = -EIO; goto free_pkt_profile; @@ -6077,9 +6072,11 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI || - rinfo->sw_act.fltr_act == ICE_NOP) + rinfo->sw_act.fltr_act == ICE_MIRROR_PACKET || + rinfo->sw_act.fltr_act == ICE_NOP) { rinfo->sw_act.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + } if (rinfo->src_vsi) rinfo->sw_act.src = ice_get_hw_vsi_num(hw, rinfo->src_vsi); @@ -6115,38 +6112,45 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, status = -ENOMEM; goto free_pkt_profile; } - if (!rinfo->flags_info.act_valid) { - act |= ICE_SINGLE_ACT_LAN_ENABLE; - act |= ICE_SINGLE_ACT_LB_ENABLE; - } else { - act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE | - ICE_SINGLE_ACT_LB_ENABLE); + + if (rinfo->sw_act.fltr_act != ICE_MIRROR_PACKET) { + if (!rinfo->flags_info.act_valid) { + act |= ICE_SINGLE_ACT_LAN_ENABLE; + act |= ICE_SINGLE_ACT_LB_ENABLE; + } else { + act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_LB_ENABLE); + } } switch (rinfo->sw_act.fltr_act) { case ICE_FWD_TO_VSI: - act |= (rinfo->sw_act.fwd_id.hw_vsi_id << - ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, + rinfo->sw_act.fwd_id.hw_vsi_id); act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; break; case ICE_FWD_TO_Q: act |= ICE_SINGLE_ACT_TO_Q; - act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & - ICE_SINGLE_ACT_Q_INDEX_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_Q_INDEX_M, + rinfo->sw_act.fwd_id.q_id); break; case ICE_FWD_TO_QGRP: q_rgn = rinfo->sw_act.qgrp_size > 0 ? (u8)ilog2(rinfo->sw_act.qgrp_size) : 0; act |= ICE_SINGLE_ACT_TO_Q; - act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & - ICE_SINGLE_ACT_Q_INDEX_M; - act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) & - ICE_SINGLE_ACT_Q_REGION_M; + act |= FIELD_PREP(ICE_SINGLE_ACT_Q_INDEX_M, + rinfo->sw_act.fwd_id.q_id); + act |= FIELD_PREP(ICE_SINGLE_ACT_Q_REGION_M, q_rgn); break; case ICE_DROP_PACKET: act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP | ICE_SINGLE_ACT_VALID_BIT; break; + case ICE_MIRROR_PACKET: + act |= ICE_SINGLE_ACT_OTHER_ACTS; + act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, + rinfo->sw_act.fwd_id.hw_vsi_id); + break; case ICE_NOP: act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, rinfo->sw_act.fwd_id.hw_vsi_id); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 08d3bbf4b44c..b890410a2bc0 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -689,6 +689,41 @@ ice_tc_setup_drop_action(struct net_device *filter_dev, return 0; } +static int ice_tc_setup_mirror_action(struct net_device *filter_dev, + struct ice_tc_flower_fltr *fltr, + struct net_device *target_dev) +{ + struct ice_repr *repr; + + fltr->action.fltr_act = ICE_MIRROR_PACKET; + + if (ice_is_port_repr_netdev(filter_dev) && + ice_is_port_repr_netdev(target_dev)) { + repr = ice_netdev_to_repr(target_dev); + + fltr->dest_vsi = repr->src_vsi; + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else if (ice_is_port_repr_netdev(filter_dev) && + ice_tc_is_dev_uplink(target_dev)) { + repr = ice_netdev_to_repr(filter_dev); + + fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi; + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else if (ice_tc_is_dev_uplink(filter_dev) && + ice_is_port_repr_netdev(target_dev)) { + repr = ice_netdev_to_repr(target_dev); + + fltr->dest_vsi = repr->src_vsi; + fltr->direction = ICE_ESWITCH_FLTR_INGRESS; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unsupported netdevice in switchdev mode"); + return -EINVAL; + } + + return 0; +} + static int ice_eswitch_tc_parse_action(struct net_device *filter_dev, struct ice_tc_flower_fltr *fltr, struct flow_action_entry *act) @@ -710,6 +745,12 @@ static int ice_eswitch_tc_parse_action(struct net_device *filter_dev, break; + case FLOW_ACTION_MIRRED: + err = ice_tc_setup_mirror_action(filter_dev, fltr, act->dev); + if (err) + return err; + break; + default: NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action in switchdev mode"); return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 59617f055e35..74d13cc5a3a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1493,9 +1493,9 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) * be static in non-adaptive mode (user configured) */ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), - ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) & - GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | - GLINT_DYN_CTL_WB_ON_ITR_M); + FIELD_PREP(GLINT_DYN_CTL_ITR_INDX_M, ICE_ITR_NONE) | + FIELD_PREP(GLINT_DYN_CTL_INTENA_MSK_M, 1) | + FIELD_PREP(GLINT_DYN_CTL_WB_ON_ITR_M, 1)); q_vector->wb_on_itr = true; } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 6df7c4487ad0..41ab6d7bbd9e 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -353,6 +353,7 @@ struct ice_ts_func_info { #define ICE_TS_TMR0_ENA_M BIT(25) #define ICE_TS_TMR1_ENA_M BIT(26) #define ICE_TS_LL_TX_TS_READ_M BIT(28) +#define ICE_TS_LL_TX_TS_INT_READ_M BIT(29) struct ice_ts_dev_info { /* Device specific info */ @@ -366,6 +367,7 @@ struct ice_ts_dev_info { u8 tmr0_ena; u8 tmr1_ena; u8 ts_ll_read; + u8 ts_ll_int_read; }; /* Function specific capabilities */ @@ -1001,7 +1003,6 @@ struct ice_hw_port_stats { u64 error_bytes; /* errbc */ u64 mac_local_faults; /* mlfc */ u64 mac_remote_faults; /* mrfc */ - u64 rx_len_errors; /* rlec */ u64 link_xon_rx; /* lxonrxc */ u64 link_xoff_rx; /* lxoffrxc */ u64 link_xon_tx; /* lxontxc */ @@ -1040,6 +1041,7 @@ enum ice_sw_fwd_act_type { ICE_FWD_TO_Q, ICE_FWD_TO_QGRP, ICE_DROP_PACKET, + ICE_MIRROR_PACKET, ICE_NOP, ICE_INVAL_ACT }; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index d6f74513b495..2ffdae9a82df 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -248,29 +248,44 @@ static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf) } /** - * ice_vf_recreate_vsi - Release and re-create the VF's VSI - * @vf: VF to recreate the VSI for + * ice_vf_reconfig_vsi - Reconfigure a VF VSI with the device + * @vf: VF to reconfigure the VSI for * - * This is only called when a single VF is being reset (i.e. VVF, VFLR, host - * VF configuration change, etc) + * This is called when a single VF is being reset (i.e. VVF, VFLR, host VF + * configuration change, etc). * - * It releases and then re-creates a new VSI. + * It brings the VSI down and then reconfigures it with the hardware. */ -static int ice_vf_recreate_vsi(struct ice_vf *vf) +int ice_vf_reconfig_vsi(struct ice_vf *vf) { + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_vsi_cfg_params params = {}; struct ice_pf *pf = vf->pf; int err; - ice_vf_vsi_release(vf); + if (WARN_ON(!vsi)) + return -EINVAL; + + params = ice_vsi_to_params(vsi); + params.flags = ICE_VSI_FLAG_NO_INIT; - err = vf->vf_ops->create_vsi(vf); + ice_vsi_decfg(vsi); + ice_fltr_remove_all(vsi); + + err = ice_vsi_cfg(vsi, ¶ms); if (err) { dev_err(ice_pf_to_dev(pf), - "Failed to recreate the VF%u's VSI, error %d\n", + "Failed to reconfigure the VF%u's VSI, error %d\n", vf->vf_id, err); return err; } + /* Update the lan_vsi_num field since it might have been changed. The + * PF lan_vsi_idx number remains the same so we don't need to change + * that. + */ + vf->lan_vsi_num = vsi->vsi_num; + return 0; } @@ -928,7 +943,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_vf_pre_vsi_rebuild(vf); - if (ice_vf_recreate_vsi(vf)) { + if (ice_vf_reconfig_vsi(vf)) { dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id); err = -EFAULT; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 35866553f288..0cc9034065c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -62,7 +62,6 @@ struct ice_vf_ops { bool (*poll_reset_status)(struct ice_vf *vf); void (*clear_reset_trigger)(struct ice_vf *vf); void (*irq_close)(struct ice_vf *vf); - int (*create_vsi)(struct ice_vf *vf); void (*post_vsi_rebuild)(struct ice_vf *vf); }; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h index 0c7e77c0a09f..91ba7fe0eaee 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -23,6 +23,7 @@ #warning "Only include ice_vf_lib_private.h in CONFIG_PCI_IOV virtualization files" #endif +int ice_vf_reconfig_vsi(struct ice_vf *vf); void ice_initialize_vf_entry(struct ice_vf *vf); void ice_dis_vf_qs(struct ice_vf *vf); int ice_check_vf_init(struct ice_vf *vf); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index d4ad0739b57b..c925813ec9ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3102,7 +3102,7 @@ static struct ice_vlan ice_vc_to_vlan(struct virtchnl_vlan *vc_vlan) { struct ice_vlan vlan = { 0 }; - vlan.prio = (vc_vlan->tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + vlan.prio = FIELD_GET(VLAN_PRIO_MASK, vc_vlan->tci); vlan.vid = vc_vlan->tci & VLAN_VID_MASK; vlan.tpid = vc_vlan->tpid; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 9ee7ab207b37..f001553e1a1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1463,16 +1463,15 @@ ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, int ret; stat_err = le16_to_cpu(ctx->rx_desc.wb.status_error0); - if (((stat_err & ICE_FXD_FLTR_WB_QW1_DD_M) >> - ICE_FXD_FLTR_WB_QW1_DD_S) != ICE_FXD_FLTR_WB_QW1_DD_YES) { + if (FIELD_GET(ICE_FXD_FLTR_WB_QW1_DD_M, stat_err) != + ICE_FXD_FLTR_WB_QW1_DD_YES) { *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; dev_err(dev, "VF %d: Desc Done not set\n", vf->vf_id); ret = -EINVAL; goto err_exit; } - prog_id = (stat_err & ICE_FXD_FLTR_WB_QW1_PROG_ID_M) >> - ICE_FXD_FLTR_WB_QW1_PROG_ID_S; + prog_id = FIELD_GET(ICE_FXD_FLTR_WB_QW1_PROG_ID_M, stat_err); if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD && ctx->v_opcode != VIRTCHNL_OP_ADD_FDIR_FILTER) { dev_err(dev, "VF %d: Desc show add, but ctx not", @@ -1491,8 +1490,7 @@ ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, goto err_exit; } - error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_M) >> - ICE_FXD_FLTR_WB_QW1_FAIL_S; + error = FIELD_GET(ICE_FXD_FLTR_WB_QW1_FAIL_M, stat_err); if (error == ICE_FXD_FLTR_WB_QW1_FAIL_YES) { if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD) { dev_err(dev, "VF %d, Failed to add FDIR rule due to no space in the table", @@ -1507,8 +1505,7 @@ ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, goto err_exit; } - error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M) >> - ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S; + error = FIELD_GET(ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M, stat_err); if (error == ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES) { dev_err(dev, "VF %d: Profile matching error", vf->vf_id); *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c index 76266e709a39..2e9ad27cb9d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c @@ -131,6 +131,7 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) { struct ice_hw *hw = &vsi->back->hw; struct ice_vsi_ctx *ctxt; + u8 *ivf; int err; /* do not allow modifying VLAN stripping when a port VLAN is configured @@ -143,19 +144,24 @@ static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) if (!ctxt) return -ENOMEM; + ivf = &ctxt->info.inner_vlan_flags; + /* Here we are configuring what the VSI should do with the VLAN tag in * the Rx packet. We can either leave the tag in the packet or put it in * the Rx descriptor. */ - if (ena) + if (ena) { /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH; - else + *ivf = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH); + } else { /* Disable stripping. Leave tag in packet */ - ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING; + *ivf = FIELD_PREP(ICE_AQ_VSI_INNER_VLAN_EMODE_M, + ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING); + } /* Allow all packets untagged/tagged */ - ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; + *ivf |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL; ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); @@ -481,10 +487,11 @@ int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid) ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags & ~(ICE_AQ_VSI_OUTER_VLAN_EMODE_M | ICE_AQ_VSI_OUTER_TAG_TYPE_M); ctxt->info.outer_vlan_flags |= - ((ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH << - ICE_AQ_VSI_OUTER_VLAN_EMODE_S) | - ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & - ICE_AQ_VSI_OUTER_TAG_TYPE_M)); + /* we want EMODE_SHOW_BOTH, but that value is zero, so the line + * above clears it well enough that we don't need to try to set + * zero here, so just do the tag type + */ + FIELD_PREP(ICE_AQ_VSI_OUTER_TAG_TYPE_M, tag_type); err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (err) @@ -589,11 +596,9 @@ int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid) ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M | ICE_AQ_VSI_OUTER_TAG_TYPE_M); ctxt->info.outer_vlan_flags |= - ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << - ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & - ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M) | - ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & - ICE_AQ_VSI_OUTER_TAG_TYPE_M); + FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M, + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL) | + FIELD_PREP(ICE_AQ_VSI_OUTER_TAG_TYPE_M, tag_type); err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (err) @@ -642,9 +647,8 @@ int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi) ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M); ctxt->info.outer_vlan_flags |= ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC | - ((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL << - ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) & - ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M); + FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M, + ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL); err = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (err) @@ -702,8 +706,7 @@ __ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid) ctxt->info.outer_vlan_flags = (ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW << ICE_AQ_VSI_OUTER_VLAN_EMODE_S) | - ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) & - ICE_AQ_VSI_OUTER_TAG_TYPE_M) | + FIELD_PREP(ICE_AQ_VSI_OUTER_TAG_TYPE_M, tag_type) | ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC | (ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) | diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 81288a17da2a..447753495c53 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -328,10 +328,9 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, if (offload->tso_segs) { qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; - qw1 |= ((u64)offload->tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) & - IDPF_TXD_CTX_QW1_TSO_LEN_M; - qw1 |= ((u64)offload->mss << IDPF_TXD_CTX_QW1_MSS_S) & - IDPF_TXD_CTX_QW1_MSS_M; + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, + offload->tso_len); + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); u64_stats_update_begin(&txq->stats_sync); u64_stats_inc(&txq->q_stats.tx.lso_pkts); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index a005626129a5..ad730d20fbe6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -505,9 +505,9 @@ static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) /* store the buffer ID and the SW maintained GEN bit to the refillq */ refillq->ring[nta] = - ((buf_id << IDPF_RX_BI_BUFID_S) & IDPF_RX_BI_BUFID_M) | - (!!(test_bit(__IDPF_Q_GEN_CHK, refillq->flags)) << - IDPF_RX_BI_GEN_S); + FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RX_BI_GEN_M, + test_bit(__IDPF_Q_GEN_CHK, refillq->flags)); if (unlikely(++nta == refillq->desc_count)) { nta = 0; @@ -1825,14 +1825,14 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, u16 gen; /* if the descriptor isn't done, no work yet to do */ - gen = (le16_to_cpu(tx_desc->qid_comptype_gen) & - IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S; + gen = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_GEN_M); if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen) break; /* Find necessary info of TX queue to clean buffers */ - rel_tx_qid = (le16_to_cpu(tx_desc->qid_comptype_gen) & - IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S; + rel_tx_qid = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_QID_M); if (rel_tx_qid >= complq->txq_grp->num_txq || !complq->txq_grp->txqs[rel_tx_qid]) { dev_err(&complq->vport->adapter->pdev->dev, @@ -1842,9 +1842,8 @@ static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, tx_q = complq->txq_grp->txqs[rel_tx_qid]; /* Determine completion type */ - ctype = (le16_to_cpu(tx_desc->qid_comptype_gen) & - IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> - IDPF_TXD_COMPLQ_COMPL_TYPE_S; + ctype = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_COMPL_TYPE_M); switch (ctype) { case IDPF_TXD_COMPLT_RE: hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head); @@ -1945,11 +1944,10 @@ void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc, u16 td_cmd, u16 size) { desc->q.qw1.cmd_dtype = - cpu_to_le16(params->dtype & IDPF_FLEX_TXD_QW1_DTYPE_M); + le16_encode_bits(params->dtype, IDPF_FLEX_TXD_QW1_DTYPE_M); desc->q.qw1.cmd_dtype |= - cpu_to_le16((td_cmd << IDPF_FLEX_TXD_QW1_CMD_S) & - IDPF_FLEX_TXD_QW1_CMD_M); - desc->q.qw1.buf_size = cpu_to_le16((u16)size); + le16_encode_bits(td_cmd, IDPF_FLEX_TXD_QW1_CMD_M); + desc->q.qw1.buf_size = cpu_to_le16(size); desc->q.qw1.l2tags.l2tag1 = cpu_to_le16(params->td_tag); } @@ -2843,8 +2841,9 @@ static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_n qword1); csum->ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M, qword0); - csum->raw_csum_inv = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M, - le16_to_cpu(rx_desc->ptype_err_fflags0)); + csum->raw_csum_inv = + le16_get_bits(rx_desc->ptype_err_fflags0, + VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M); csum->raw_csum = le16_to_cpu(rx_desc->misc.raw_cs); } @@ -2938,8 +2937,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, struct idpf_rx_ptype_decoded decoded; u16 rx_ptype; - rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M, - le16_to_cpu(rx_desc->ptype_err_fflags0)); + rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0, + VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M); decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; /* If we don't know the ptype we can't do anything else with it. Just @@ -2953,8 +2952,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, skb->protocol = eth_type_trans(skb, rxq->vport->netdev); - if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M, - le16_to_cpu(rx_desc->hdrlen_flags))) + if (le16_get_bits(rx_desc->hdrlen_flags, + VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); idpf_rx_splitq_extract_csum_bits(rx_desc, &csum_bits); @@ -3148,8 +3147,8 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) dma_rmb(); /* if the descriptor isn't done, no work yet to do */ - gen_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id); - gen_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M, gen_id); + gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M); if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id) break; @@ -3164,9 +3163,8 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) continue; } - pkt_len = le16_to_cpu(rx_desc->pktlen_gen_bufq_id); - pkt_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M, - pkt_len); + pkt_len = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M); hbo = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M, rx_desc->status_err0_qw1); @@ -3183,14 +3181,12 @@ static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) goto bypass_hsplit; } - hdr_len = le16_to_cpu(rx_desc->hdrlen_flags); - hdr_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M, - hdr_len); + hdr_len = le16_get_bits(rx_desc->hdrlen_flags, + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M); bypass_hsplit: - bufq_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id); - bufq_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M, - bufq_id); + bufq_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M); rxq_set = container_of(rxq, struct idpf_rxq_set, rxq); if (!bufq_id) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 8d6e44ee1895..64dfc362d1dc 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -222,8 +222,7 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) } /* set lan id */ - hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> - E1000_STATUS_FUNC_SHIFT; + hw->bus.func = FIELD_GET(E1000_STATUS_FUNC_MASK, rd32(E1000_STATUS)); /* Set phy->phy_addr and phy->id. */ ret_val = igb_get_phy_id_82575(hw); @@ -262,8 +261,8 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) if (ret_val) goto out; - data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> - E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; + data = FIELD_GET(E1000_M88E1112_MAC_CTRL_1_MODE_MASK, + data); if (data == E1000_M88E1112_AUTO_COPPER_SGMII || data == E1000_M88E1112_AUTO_COPPER_BASEX) hw->mac.ops.check_for_link = @@ -330,8 +329,7 @@ static s32 igb_init_nvm_params_82575(struct e1000_hw *hw) u32 eecd = rd32(E1000_EECD); u16 size; - size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> - E1000_EECD_SIZE_EX_SHIFT); + size = FIELD_GET(E1000_EECD_SIZE_EX_MASK, eecd); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. @@ -2798,7 +2796,7 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) return 0; hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg); - if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) + if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg) != NVM_ETS_TYPE_EMC) return E1000_NOT_IMPLEMENTED; @@ -2808,10 +2806,8 @@ static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw) for (i = 1; i < num_sensors; i++) { hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor); - sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> - NVM_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> - NVM_ETS_DATA_LOC_SHIFT); + sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor); + sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor); if (sensor_location != 0) hw->phy.ops.read_i2c_byte(hw, @@ -2859,20 +2855,17 @@ static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) return 0; hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg); - if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) + if (FIELD_GET(NVM_ETS_TYPE_MASK, ets_cfg) != NVM_ETS_TYPE_EMC) return E1000_NOT_IMPLEMENTED; - low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >> - NVM_ETS_LTHRES_DELTA_SHIFT); + low_thresh_delta = FIELD_GET(NVM_ETS_LTHRES_DELTA_MASK, ets_cfg); num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); for (i = 1; i <= num_sensors; i++) { hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor); - sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> - NVM_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> - NVM_ETS_DATA_LOC_SHIFT); + sensor_index = FIELD_GET(NVM_ETS_DATA_INDEX_MASK, ets_sensor); + sensor_location = FIELD_GET(NVM_ETS_DATA_LOC_MASK, ets_sensor); therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK; hw->phy.ops.write_i2c_byte(hw, diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index b9b9d35494d2..503b239868e8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -5,9 +5,9 @@ * e1000_i211 */ -#include <linux/types.h> +#include <linux/bitfield.h> #include <linux/if_ether.h> - +#include <linux/types.h> #include "e1000_hw.h" #include "e1000_i210.h" @@ -473,7 +473,7 @@ s32 igb_read_invm_version(struct e1000_hw *hw, /* Check if we have second version location used */ else if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { - version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record); status = 0; break; } @@ -483,8 +483,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw, else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && (i != 1))) { - version = (*next_record & E1000_INVM_VER_FIELD_TWO) - >> 13; + version = FIELD_GET(E1000_INVM_VER_FIELD_TWO, + *next_record); status = 0; break; } @@ -493,15 +493,15 @@ s32 igb_read_invm_version(struct e1000_hw *hw, */ else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && ((*record & 0x3) == 0)) { - version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + version = FIELD_GET(E1000_INVM_VER_FIELD_ONE, *record); status = 0; break; } } if (!status) { - invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) - >> E1000_INVM_MAJOR_SHIFT; + invm_ver->invm_major = FIELD_GET(E1000_INVM_MAJOR_MASK, + version); invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; } /* Read Image Type */ @@ -520,7 +520,8 @@ s32 igb_read_invm_version(struct e1000_hw *hw, ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || ((((*record & 0x3) != 0) && (i != 1)))) { invm_ver->invm_img_type = - (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; + FIELD_GET(E1000_INVM_IMGTYPE_FIELD, + *next_record); status = 0; break; } diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 5a23b9cfec6c..fa3dfafd2bb1 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -56,7 +56,7 @@ s32 igb_get_bus_info_pcie(struct e1000_hw *hw) } reg = rd32(E1000_STATUS); - bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; + bus->func = FIELD_GET(E1000_STATUS_FUNC_MASK, reg); return 0; } diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index fa136e6e9328..2dcd64d6dec3 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2007 - 2018 Intel Corporation. */ -#include <linux/if_ether.h> +#include <linux/bitfield.h> #include <linux/delay.h> - +#include <linux/if_ether.h> #include "e1000_mac.h" #include "e1000_nvm.h" @@ -708,10 +708,10 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) */ if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); - fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) - >> NVM_MAJOR_SHIFT; - fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) - >> NVM_MINOR_SHIFT; + fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, + fw_version); + fw_vers->eep_minor = FIELD_GET(NVM_MINOR_MASK, + fw_version); fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); goto etrack_id; } @@ -753,15 +753,13 @@ void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) return; } hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); - fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) - >> NVM_MAJOR_SHIFT; + fw_vers->eep_major = FIELD_GET(NVM_MAJOR_MASK, fw_version); /* check for old style version format in newer images*/ if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { eeprom_verl = (fw_version & NVM_COMB_VER_MASK); } else { - eeprom_verl = (fw_version & NVM_MINOR_MASK) - >> NVM_MINOR_SHIFT; + eeprom_verl = FIELD_GET(NVM_MINOR_MASK, fw_version); } /* Convert minor value to hex before assigning to output struct * Val to be converted will not be higher than 99, per tool output diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index a018000f7db9..cd65008c7ef5 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2007 - 2018 Intel Corporation. */ -#include <linux/if_ether.h> +#include <linux/bitfield.h> #include <linux/delay.h> - +#include <linux/if_ether.h> #include "e1000_mac.h" #include "e1000_phy.h" @@ -255,7 +255,7 @@ s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) } /* Need to byte-swap the 16-bit value. */ - *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00); + *data = ((i2ccmd >> 8) & 0x00FF) | FIELD_PREP(0xFF00, i2ccmd); return 0; } @@ -282,7 +282,7 @@ s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) } /* Swap the data bytes for the I2C interface */ - phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); + phy_data_swapped = ((data >> 8) & 0x00FF) | FIELD_PREP(0xFF00, data); /* Set up Op-code, Phy Address, and register address in the I2CCMD * register. The MAC will take care of interfacing with the @@ -1682,8 +1682,7 @@ s32 igb_get_cable_length_m88(struct e1000_hw *hw) if (ret_val) goto out; - index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT; + index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) { ret_val = -E1000_ERR_PHY; goto out; @@ -1796,8 +1795,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) if (ret_val) goto out; - index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> - M88E1000_PSSR_CABLE_LENGTH_SHIFT; + index = FIELD_GET(M88E1000_PSSR_CABLE_LENGTH, phy_data); if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) { ret_val = -E1000_ERR_PHY; goto out; @@ -2578,8 +2576,7 @@ s32 igb_get_cable_length_82580(struct e1000_hw *hw) if (ret_val) goto out; - length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >> - I82580_DSTATUS_CABLE_LENGTH_SHIFT; + length = FIELD_GET(I82580_DSTATUS_CABLE_LENGTH, phy_data); if (length == E1000_CABLE_LENGTH_UNDEFINED) ret_val = -E1000_ERR_PHY; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index d868a70732f4..b66199c9bb3a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2432,7 +2432,7 @@ static int igb_get_ts_info(struct net_device *dev, } } -#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +#define ETHER_TYPE_FULL_MASK cpu_to_be16(FIELD_MAX(U16_MAX)) static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -2711,8 +2711,7 @@ static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter, etqf |= (etype & E1000_ETQF_ETYPE_MASK); etqf &= ~E1000_ETQF_QUEUE_MASK; - etqf |= ((input->action << E1000_ETQF_QUEUE_SHIFT) - & E1000_ETQF_QUEUE_MASK); + etqf |= FIELD_PREP(E1000_ETQF_QUEUE_MASK, input->action); etqf |= E1000_ETQF_QUEUE_ENABLE; wr32(E1000_ETQF(i), etqf); @@ -2731,8 +2730,8 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, u32 vlapqf; vlapqf = rd32(E1000_VLAPQF); - vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) - >> VLAN_PRIO_SHIFT; + vlan_priority = FIELD_GET(VLAN_PRIO_MASK, + ntohs(input->filter.vlan_tci)); queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK; /* check whether this vlan prio is already set */ @@ -2815,7 +2814,7 @@ static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter, u8 vlan_priority; u32 vlapqf; - vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + vlan_priority = FIELD_GET(VLAN_PRIO_MASK, vlan_tci); vlapqf = rd32(E1000_VLAPQF); vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b2295caa2f0a..4df8d4153aa5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7295,7 +7295,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) static int igb_set_vf_multicasts(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) { - int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int n = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); u16 *hash_list = (u16 *)&msgbuf[1]; struct vf_data_storage *vf_data = &adapter->vf_data[vf]; int i; @@ -7555,7 +7555,7 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) { - int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int add = FIELD_GET(E1000_VT_MSGINFO_MASK, msgbuf[0]); int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); int ret; @@ -9810,8 +9810,7 @@ static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, tx_rate; bcnrc_val = E1000_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) & - E1000_RTTBCNRC_RF_INT_MASK); + bcnrc_val |= FIELD_PREP(E1000_RTTBCNRC_RF_INT_MASK, rf_int); bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); } else { bcnrc_val = 0; @@ -10000,8 +9999,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) hwm = 64 * (pba - 6); reg = rd32(E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; - reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) - & E1000_FCRTC_RTH_COAL_MASK); + reg |= FIELD_PREP(E1000_FCRTC_RTH_COAL_MASK, hwm); wr32(E1000_FCRTC, reg); /* Set the DMA Coalescing Rx threshold to PBA - 2 * max @@ -10010,8 +10008,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) dmac_thr = pba - 10; reg = rd32(E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; - reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) - & E1000_DMACR_DMACTHR_MASK); + reg |= FIELD_PREP(E1000_DMACR_DMACTHR_MASK, dmac_thr); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c index a3cd7ac48d4b..d15282ee5ea8 100644 --- a/drivers/net/ethernet/intel/igbvf/mbx.c +++ b/drivers/net/ethernet/intel/igbvf/mbx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2009 - 2018 Intel Corporation. */ +#include <linux/bitfield.h> #include "mbx.h" /** diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index fd712585af27..a4d4f00e6a87 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -3,25 +3,25 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/vmalloc.h> -#include <linux/pagemap.h> +#include <linux/bitfield.h> #include <linux/delay.h> -#include <linux/netdevice.h> -#include <linux/tcp.h> -#include <linux/ipv6.h> -#include <linux/slab.h> -#include <net/checksum.h> -#include <net/ip6_checksum.h> -#include <linux/mii.h> #include <linux/ethtool.h> #include <linux/if_vlan.h> +#include <linux/init.h> +#include <linux/ipv6.h> +#include <linux/mii.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/pagemap.h> +#include <linux/pci.h> #include <linux/prefetch.h> #include <linux/sctp.h> - +#include <linux/slab.h> +#include <linux/tcp.h> +#include <linux/types.h> +#include <linux/vmalloc.h> +#include <net/checksum.h> +#include <net/ip6_checksum.h> #include "igbvf.h" char igbvf_driver_name[] = "igbvf"; @@ -273,9 +273,8 @@ static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter, * that case, it fills the header buffer and spills the rest * into the page. */ - hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) - & E1000_RXDADV_HDRBUFLEN_MASK) >> - E1000_RXDADV_HDRBUFLEN_SHIFT; + hlen = le16_get_bits(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info, + E1000_RXDADV_HDRBUFLEN_MASK); if (hlen > adapter->rx_ps_hdr_size) hlen = adapter->rx_ps_hdr_size; diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index a1d815af507d..9fae8bdec2a7 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -68,8 +68,7 @@ static s32 igc_init_nvm_params_base(struct igc_hw *hw) u32 eecd = rd32(IGC_EECD); u16 size; - size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >> - IGC_EECD_SIZE_EX_SHIFT); + size = FIELD_GET(IGC_EECD_SIZE_EX_MASK, eecd); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. @@ -162,8 +161,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) phy->reset_delay_us = 100; /* set lan id */ - hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >> - IGC_STATUS_FUNC_SHIFT; + hw->bus.func = FIELD_GET(IGC_STATUS_FUNC_MASK, rd32(IGC_STATUS)); /* Make sure the PHY is in a good state. Several people have reported * firmware leaving the PHY's page select register set to something diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 17546a035ab1..0dd61719f1ed 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Intel Corporation */ +#include <linux/bitfield.h> #include <linux/delay.h> #include "igc_hw.h" @@ -578,9 +579,8 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) /* Calculate tw_system (nsec). */ if (speed == SPEED_100) { - tw_system = ((rd32(IGC_EEE_SU) & - IGC_TW_SYSTEM_100_MASK) >> - IGC_TW_SYSTEM_100_SHIFT) * 500; + tw_system = FIELD_GET(IGC_TW_SYSTEM_100_MASK, + rd32(IGC_EEE_SU)) * 500; } else { tw_system = (rd32(IGC_EEE_SU) & IGC_TW_SYSTEM_1000_MASK) * 500; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 61db1d3bfa0b..ba8d3fe186ae 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3452,8 +3452,8 @@ static int igc_write_flex_filter_ll(struct igc_adapter *adapter, /* Configure filter */ queuing = input->length & IGC_FHFT_LENGTH_MASK; - queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK; - queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK; + queuing |= FIELD_PREP(IGC_FHFT_QUEUE_MASK, input->rx_queue); + queuing |= FIELD_PREP(IGC_FHFT_PRIO_MASK, input->prio); if (input->immediate_irq) queuing |= IGC_FHFT_IMM_INT; @@ -3712,8 +3712,7 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter, } if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { - int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> - VLAN_PRIO_SHIFT; + int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); err = igc_add_vlan_prio_filter(adapter, prio, rule->action); if (err) @@ -3735,8 +3734,7 @@ static void igc_disable_nfc_rule(struct igc_adapter *adapter, igc_del_etype_filter(adapter, rule->filter.etype); if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { - int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> - VLAN_PRIO_SHIFT; + int prio = FIELD_GET(VLAN_PRIO_MASK, rule->filter.vlan_tci); igc_del_vlan_prio_filter(adapter, prio); } diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 53b77c969c85..7cd8716d2ffa 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Intel Corporation */ +#include <linux/bitfield.h> #include "igc_phy.h" /** @@ -726,7 +727,7 @@ static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr, */ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) { - u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT; + u8 dev_addr = FIELD_GET(GPY_MMD_MASK, offset); s32 ret_val; offset = offset & GPY_REG_MASK; @@ -757,7 +758,7 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) */ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) { - u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT; + u8 dev_addr = FIELD_GET(GPY_MMD_MASK, offset); s32 ret_val; offset = offset & GPY_REG_MASK; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 100388968e4d..6835d5f18753 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -123,14 +123,14 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) if (ret_val) return ret_val; if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; /* Check to see if SFP+ module is supported */ ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, &data_offset); if (ret_val) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; break; default: break; @@ -213,7 +213,7 @@ static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, break; default: - return IXGBE_ERR_LINK_SETUP; + return -EIO; } return 0; @@ -283,7 +283,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) /* Validate the water mark configuration */ if (!hw->fc.pause_time) - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; /* Low water mark of zero causes XOFF floods */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { @@ -292,7 +292,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) if (!hw->fc.low_water[i] || hw->fc.low_water[i] >= hw->fc.high_water[i]) { hw_dbg(hw, "Invalid water mark configuration\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } } } @@ -369,7 +369,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Set 802.3x based flow control settings. */ @@ -438,7 +438,7 @@ static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autonegotiation did not complete.\n"); } } @@ -478,7 +478,7 @@ static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) { hw_dbg(hw, "Link was indicated but link is down\n"); - return IXGBE_ERR_LINK_SETUP; + return -EIO; } return 0; @@ -594,7 +594,7 @@ static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, speed &= link_capabilities; if (speed == IXGBE_LINK_SPEED_UNKNOWN) - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; /* Set KX4/KX support according to speed requested */ else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN || @@ -701,9 +701,9 @@ static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) /* Init PHY and function pointers, perform SFP setup */ phy_status = hw->phy.ops.init(hw); - if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (phy_status == -EOPNOTSUPP) return phy_status; - if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) + if (phy_status == -ENOENT) goto mac_reset_top; hw->phy.ops.reset(hw); @@ -727,7 +727,7 @@ mac_reset_top: udelay(1); } if (ctrl & IXGBE_CTRL_RST) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -789,12 +789,12 @@ static s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); rar_high &= ~IXGBE_RAH_VIND_MASK; - rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK); + rar_high |= FIELD_PREP(IXGBE_RAH_VIND_MASK, vmdq); IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); return 0; } @@ -814,7 +814,7 @@ static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); @@ -845,7 +845,7 @@ static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, u32 vftabyte; if (vlan > 4095) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Determine 32-bit word position in array */ regindex = (vlan >> 5) & 0x7F; /* upper seven bits */ @@ -964,7 +964,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, gssr = IXGBE_GSSR_PHY0_SM; if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; if (hw->phy.type == ixgbe_phy_nl) { /* @@ -993,7 +993,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) { hw_dbg(hw, "EEPROM read did not pass.\n"); - status = IXGBE_ERR_SFP_NOT_PRESENT; + status = -ENOENT; goto out; } @@ -1003,7 +1003,7 @@ static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr, *eeprom_data = (u8)(sfp_data >> 8); } else { - status = IXGBE_ERR_PHY; + status = -EIO; } out: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 58ea959a4482..339e106a5732 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -117,7 +117,7 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; if (hw->eeprom.ops.read(hw, ++data_offset, &data_value)) goto setup_sfp_err; @@ -144,7 +144,7 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) if (ret_val) { hw_dbg(hw, " sfp module setup not complete\n"); - return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + return -EIO; } } @@ -159,7 +159,7 @@ setup_sfp_err: usleep_range(hw->eeprom.semaphore_delay * 1000, hw->eeprom.semaphore_delay * 2000); hw_err(hw, "eeprom read at offset %d failed\n", data_offset); - return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + return -EIO; } /** @@ -184,7 +184,7 @@ static s32 prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked, ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; *locked = true; } @@ -219,7 +219,7 @@ static s32 prot_autoc_write_82599(struct ixgbe_hw *hw, u32 autoc, bool locked) ret_val = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); if (ret_val) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; locked = true; } @@ -400,7 +400,7 @@ static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, break; default: - return IXGBE_ERR_LINK_SETUP; + return -EIO; } if (hw->phy.multispeed_fiber) { @@ -541,7 +541,7 @@ static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autoneg did not complete.\n"); } } @@ -794,7 +794,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, speed &= link_capabilities; if (speed == IXGBE_LINK_SPEED_UNKNOWN) - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/ if (hw->mac.orig_link_settings_stored) @@ -861,8 +861,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, msleep(100); } if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { - status = - IXGBE_ERR_AUTONEG_NOT_COMPLETE; + status = -EIO; hw_dbg(hw, "Autoneg did not complete.\n"); } } @@ -927,7 +926,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) /* Identify PHY and related function pointers */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Setup SFP module if there is one present. */ @@ -936,7 +935,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Reset PHY */ @@ -974,7 +973,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -1093,7 +1092,7 @@ static s32 ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd) udelay(10); } - return IXGBE_ERR_FDIR_CMD_INCOMPLETE; + return -EIO; } /** @@ -1155,7 +1154,7 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) } if (i >= IXGBE_FDIR_INIT_DONE_POLL) { hw_dbg(hw, "Flow Director Signature poll time exceeded!\n"); - return IXGBE_ERR_FDIR_REINIT_FAILED; + return -EIO; } /* Clear FDIR statistics registers (read to clear) */ @@ -1387,7 +1386,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on flow type input\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* configure FDIRCMD register */ @@ -1546,7 +1545,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on vm pool mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) { @@ -1555,14 +1554,14 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, if (input_mask->formatted.dst_port || input_mask->formatted.src_port) { hw_dbg(hw, " Error on src/dst port mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } break; case IXGBE_ATR_L4TYPE_MASK: break; default: hw_dbg(hw, " Error on flow type mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (ntohs(input_mask->formatted.vlan_id) & 0xEFFF) { @@ -1583,7 +1582,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on VLAN mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch ((__force u16)input_mask->formatted.flex_bytes & 0xFFFF) { @@ -1595,7 +1594,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, break; default: hw_dbg(hw, " Error on flexible byte mask\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ @@ -1824,7 +1823,7 @@ static s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw) /* Return error if SFP module has been detected but is not supported */ if (hw->phy.type == ixgbe_phy_sfp_unsupported) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; return status; } @@ -1863,13 +1862,13 @@ static s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval) * Verifies that installed the firmware version is 0.6 or higher * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. * - * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or - * if the FW version is not supported. + * Return: -EACCES if the FW is not present or if the FW version is + * not supported. **/ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) { - s32 status = IXGBE_ERR_EEPROM_VERSION; u16 fw_offset, fw_ptp_cfg_offset; + s32 status = -EACCES; u16 offset; u16 fw_version = 0; @@ -1883,7 +1882,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) goto fw_version_err; if (fw_offset == 0 || fw_offset == 0xFFFF) - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; /* get the offset to the Pass Through Patch Configuration block */ offset = fw_offset + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR; @@ -1891,7 +1890,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) goto fw_version_err; if (fw_ptp_cfg_offset == 0 || fw_ptp_cfg_offset == 0xFFFF) - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; /* get the firmware version */ offset = fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4; @@ -1905,7 +1904,7 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) fw_version_err: hw_err(hw, "eeprom read at offset %d failed\n", offset); - return IXGBE_ERR_EEPROM_VERSION; + return -EACCES; } /** @@ -2038,7 +2037,7 @@ static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw) if (!(anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK)) { hw_dbg(hw, "auto negotiation not completed\n"); - ret_val = IXGBE_ERR_RESET_FAILED; + ret_val = -EIO; goto reset_pipeline_out; } @@ -2087,7 +2086,7 @@ static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, if (!timeout) { hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; goto release_i2c_access; } } @@ -2141,7 +2140,7 @@ static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, if (!timeout) { hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; goto release_i2c_access; } } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 878dd8dff528..2e6e0365154a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -124,7 +124,7 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw) */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } /* @@ -215,7 +215,7 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } if (hw->mac.type != ixgbe_mac_X540) { @@ -500,7 +500,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, if (pba_num == NULL) { hw_dbg(hw, "PBA string buffer was null\n"); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data); @@ -526,7 +526,7 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, /* we will need 11 characters to store the PBA */ if (pba_num_size < 11) { hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; + return -ENOSPC; } /* extract hex string from data and pba_ptr */ @@ -563,13 +563,13 @@ s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, if (length == 0xFFFF || length == 0) { hw_dbg(hw, "NVM PBA number section invalid length\n"); - return IXGBE_ERR_PBA_SECTION; + return -EIO; } /* check if pba_num buffer is big enough */ if (pba_num_size < (((u32)length * 2) - 1)) { hw_dbg(hw, "PBA string buffer too small\n"); - return IXGBE_ERR_NO_SPACE; + return -ENOSPC; } /* trim pba length from start of string */ @@ -684,7 +684,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) u32 reg; reg = IXGBE_READ_REG(hw, IXGBE_STATUS); - bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT; + bus->func = FIELD_GET(IXGBE_STATUS_LAN_ID, reg); bus->lan_id = bus->func; /* check for a port swap */ @@ -695,8 +695,8 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) /* Get MAC instance from EEPROM for configuring CS4227 */ if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) { hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4); - bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >> - IXGBE_EE_CTRL_4_INST_ID_SHIFT; + bus->instance_id = FIELD_GET(IXGBE_EE_CTRL_4_INST_ID, + ee_ctrl_4); } } @@ -805,7 +805,7 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); @@ -826,7 +826,7 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn off the LED, set mode to OFF. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); @@ -870,10 +870,9 @@ s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw) * SPI EEPROM is assumed here. This code would need to * change if a future EEPROM is not SPI. */ - eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> - IXGBE_EEC_SIZE_SHIFT); + eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec); eeprom->word_size = BIT(eeprom_size + - IXGBE_EEPROM_WORD_SIZE_SHIFT); + IXGBE_EEPROM_WORD_SIZE_SHIFT); } if (eec & IXGBE_EEC_ADDR_SIZE) @@ -904,11 +903,8 @@ s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset + words > hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || (offset + words > hw->eeprom.word_size)) + return -EINVAL; /* * The EEPROM page size cannot be queried from the chip. We do lazy @@ -962,7 +958,7 @@ static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, if (ixgbe_ready_eeprom(hw) != 0) { ixgbe_release_eeprom(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } for (i = 0; i < words; i++) { @@ -1028,7 +1024,7 @@ s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data) hw->eeprom.ops.init_params(hw); if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + return -EINVAL; return ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data); } @@ -1050,11 +1046,8 @@ s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset + words > hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || (offset + words > hw->eeprom.word_size)) + return -EINVAL; /* * We cannot hold synchronization semaphores for too long @@ -1099,7 +1092,7 @@ static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, if (ixgbe_ready_eeprom(hw) != 0) { ixgbe_release_eeprom(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } for (i = 0; i < words; i++) { @@ -1142,7 +1135,7 @@ s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + return -EINVAL; return ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); } @@ -1165,11 +1158,8 @@ s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || offset >= hw->eeprom.word_size) + return -EINVAL; for (i = 0; i < words; i++) { eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | @@ -1262,11 +1252,8 @@ s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, hw->eeprom.ops.init_params(hw); - if (words == 0) - return IXGBE_ERR_INVALID_ARGUMENT; - - if (offset >= hw->eeprom.word_size) - return IXGBE_ERR_EEPROM; + if (words == 0 || offset >= hw->eeprom.word_size) + return -EINVAL; for (i = 0; i < words; i++) { eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | @@ -1328,7 +1315,7 @@ static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg) } udelay(5); } - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -1344,7 +1331,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) u32 i; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) != 0) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); @@ -1366,7 +1353,7 @@ static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) hw_dbg(hw, "Could not acquire EEPROM grant\n"); hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Setup EEPROM for Read/Write */ @@ -1419,7 +1406,7 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw)); if (swsm & IXGBE_SWSM_SMBI) { hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } } @@ -1447,7 +1434,7 @@ static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) if (i >= timeout) { hw_dbg(hw, "SWESMBI Software EEPROM semaphore not granted.\n"); ixgbe_release_eeprom_semaphore(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } return 0; @@ -1503,7 +1490,7 @@ static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) */ if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) { hw_dbg(hw, "SPI EEPROM Status error\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } return 0; @@ -1715,7 +1702,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { if (hw->eeprom.ops.read(hw, i, &pointer)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* If the pointer seems invalid */ @@ -1724,7 +1711,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) if (hw->eeprom.ops.read(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } if (length == 0xFFFF || length == 0) @@ -1733,7 +1720,7 @@ s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) for (j = pointer + 1; j <= pointer + length; j++) { if (hw->eeprom.ops.read(hw, j, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -1786,7 +1773,7 @@ s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, * calculated checksum */ if (read_checksum != checksum) - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; /* If the user cares, return the calculated checksum */ if (checksum_val) @@ -1845,7 +1832,7 @@ s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } /* setup VMDq pool selection before this RAR gets enabled */ @@ -1897,7 +1884,7 @@ s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", index); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } /* @@ -2146,7 +2133,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) /* Validate the water mark configuration. */ if (!hw->fc.pause_time) - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; /* Low water mark of zero causes XOFF floods */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { @@ -2155,7 +2142,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) if (!hw->fc.low_water[i] || hw->fc.low_water[i] >= hw->fc.high_water[i]) { hw_dbg(hw, "Invalid water mark configuration\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } } } @@ -2212,7 +2199,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) break; default: hw_dbg(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } /* Set 802.3x based flow control settings. */ @@ -2269,7 +2256,7 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EINVAL; if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) { /* @@ -2321,7 +2308,7 @@ static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw) linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) || (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1)) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); @@ -2353,12 +2340,12 @@ static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw) */ links = IXGBE_READ_REG(hw, IXGBE_LINKS); if ((links & IXGBE_LINKS_KX_AN_COMP) == 0) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; if (hw->mac.type == ixgbe_mac_82599EB) { links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2); if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0) - return IXGBE_ERR_FC_NOT_NEGOTIATED; + return -EIO; } /* * Read the 10g AN autoc and LP ability registers and resolve @@ -2407,8 +2394,8 @@ static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw) **/ void ixgbe_fc_autoneg(struct ixgbe_hw *hw) { - s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; ixgbe_link_speed speed; + s32 ret_val = -EIO; bool link_up; /* @@ -2510,7 +2497,7 @@ static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Disables PCI-Express primary access and verifies there are no pending - * requests. IXGBE_ERR_PRIMARY_REQUESTS_PENDING is returned if primary disable + * requests. -EALREADY is returned if primary disable * bit hasn't caused the primary requests to be disabled, else 0 * is returned signifying primary requests disabled. **/ @@ -2575,7 +2562,7 @@ gio_disable_fail: } hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); - return IXGBE_ERR_PRIMARY_REQUESTS_PENDING; + return -EALREADY; } /** @@ -2600,7 +2587,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask) * SW_FW_SYNC bits (not just NVM) */ if (ixgbe_get_eeprom_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); if (!(gssr & (fwmask | swmask))) { @@ -2620,7 +2607,7 @@ s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask) ixgbe_release_swfw_sync(hw, gssr & (fwmask | swmask)); usleep_range(5000, 10000); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } /** @@ -2757,7 +2744,7 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) s32 ret_val; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* * Link must be up to auto-blink the LEDs; @@ -2803,7 +2790,7 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) s32 ret_val; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg); if (ret_val) @@ -2963,7 +2950,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); @@ -3014,7 +3001,7 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { hw_dbg(hw, "RAR index %d is out of range.\n", rar); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } if (vmdq < 32) { @@ -3091,7 +3078,7 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) * will simply bypass the VLVF if there are no entries present in the * VLVF that contain our VLAN */ - first_empty_slot = vlvf_bypass ? IXGBE_ERR_NO_SPACE : 0; + first_empty_slot = vlvf_bypass ? -ENOSPC : 0; /* add VLAN enable bit for comparison */ vlan |= IXGBE_VLVF_VIEN; @@ -3115,7 +3102,7 @@ static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass) if (!first_empty_slot) hw_dbg(hw, "No space in VLVF.\n"); - return first_empty_slot ? : IXGBE_ERR_NO_SPACE; + return first_empty_slot ? : -ENOSPC; } /** @@ -3135,7 +3122,7 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, s32 vlvf_index; if ((vlan > 4095) || (vind > 63)) - return IXGBE_ERR_PARAM; + return -EINVAL; /* * this is a 2 part operation - first the VFTA, then the @@ -3611,7 +3598,8 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) * * Communicates with the manageability block. On success return 0 * else returns semaphore error when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * semaphore, -EINVAL when incorrect parameters passed or -EIO when + * command fails. * * This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held * by the caller. @@ -3624,7 +3612,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EINVAL; } /* Set bit 9 of FWSTS clearing FW reset indication */ @@ -3635,13 +3623,13 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, hicr = IXGBE_READ_REG(hw, IXGBE_HICR); if (!(hicr & IXGBE_HICR_EN)) { hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n"); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; } /* Calculate length in DWORDs. We must be DWORD aligned */ if (length % sizeof(u32)) { hw_dbg(hw, "Buffer length failure, not aligned to dword"); - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; } dword_len = length >> 2; @@ -3666,7 +3654,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, /* Check command successful completion. */ if ((timeout && i == timeout) || !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; return 0; } @@ -3686,7 +3674,7 @@ s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, * in these cases. * * Communicates with the manageability block. On success return 0 - * else return IXGBE_ERR_HOST_INTERFACE_COMMAND. + * else return -EIO or -EINVAL. **/ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, u32 length, u32 timeout, @@ -3701,7 +3689,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EINVAL; } /* Take management host interface semaphore */ status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); @@ -3731,7 +3719,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer, if (length < round_up(buf_len, 4) + hdr_size) { hw_dbg(hw, "Buffer not large enough for reply message.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; + status = -EIO; goto rel_out; } @@ -3762,8 +3750,8 @@ rel_out: * * Sends driver version number to firmware through the manageability * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * else returns -EBUSY when encountering an error acquiring + * semaphore or -EIO when command fails. **/ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, u8 sub, __always_unused u16 len, @@ -3799,7 +3787,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, FW_CEM_RESP_STATUS_SUCCESS) ret_val = 0; else - ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + ret_val = -EIO; break; } @@ -3897,14 +3885,14 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg, return status; if ((*ets_offset == 0x0000) || (*ets_offset == 0xFFFF)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = hw->eeprom.ops.read(hw, *ets_offset, ets_cfg); if (status) return status; if ((*ets_cfg & IXGBE_ETS_TYPE_MASK) != IXGBE_ETS_TYPE_EMC_SHIFTED) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; return 0; } @@ -3927,7 +3915,7 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) /* Only support thermal sensors attached to physical port 0 */ if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset); if (status) @@ -3946,10 +3934,10 @@ s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) if (status) return status; - sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> - IXGBE_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> - IXGBE_ETS_DATA_LOC_SHIFT); + sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK, + ets_sensor); + sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK, + ets_sensor); if (sensor_location != 0) { status = hw->phy.ops.read_i2c_byte(hw, @@ -3987,14 +3975,13 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) /* Only support thermal sensors attached to physical port 0 */ if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset); if (status) return status; - low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >> - IXGBE_ETS_LTHRES_DELTA_SHIFT); + low_thresh_delta = FIELD_GET(IXGBE_ETS_LTHRES_DELTA_MASK, ets_cfg); num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); if (num_sensors > IXGBE_MAX_SENSORS) num_sensors = IXGBE_MAX_SENSORS; @@ -4008,10 +3995,10 @@ s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) ets_offset + 1 + i); continue; } - sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> - IXGBE_ETS_DATA_INDEX_SHIFT); - sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> - IXGBE_ETS_DATA_LOC_SHIFT); + sensor_index = FIELD_GET(IXGBE_ETS_DATA_INDEX_MASK, + ets_sensor); + sensor_location = FIELD_GET(IXGBE_ETS_DATA_LOC_MASK, + ets_sensor); therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK; hw->phy.ops.write_i2c_byte(hw, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index af55f19d5bba..9a63457712c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3372,7 +3372,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + s32 status = -EFAULT; u8 databyte = 0xFF; int i = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 7311bd545acf..18d63c8c2ff4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -670,8 +670,8 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter) int fcoe_i_h = fcoe->offset + ((i + fcreta_size) % fcoe->indices); fcoe_q_h = adapter->rx_ring[fcoe_i_h]->reg_idx; - fcoe_q_h = (fcoe_q_h << IXGBE_FCRETA_ENTRY_HIGH_SHIFT) & - IXGBE_FCRETA_ENTRY_HIGH_MASK; + fcoe_q_h = FIELD_PREP(IXGBE_FCRETA_ENTRY_HIGH_MASK, + fcoe_q_h); } fcoe_i = fcoe->offset + (i % fcoe->indices); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 94bde2cad0f4..bd541527c8c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2756,7 +2756,6 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 eicr = adapter->interrupt_event; - s32 rc; if (test_bit(__IXGBE_DOWN, &adapter->state)) return; @@ -2790,14 +2789,13 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) } /* Check if this is not due to overtemp */ - if (hw->phy.ops.check_overtemp(hw) != IXGBE_ERR_OVERTEMP) + if (!hw->phy.ops.check_overtemp(hw)) return; break; case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: - rc = hw->phy.ops.check_overtemp(hw); - if (rc != IXGBE_ERR_OVERTEMP) + if (!hw->phy.ops.check_overtemp(hw)) return; break; default: @@ -5512,7 +5510,7 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { u32 speed; bool autoneg, link_up = false; - int ret = IXGBE_ERR_LINK_SETUP; + int ret = -EIO; if (hw->mac.ops.check_link) ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); @@ -5983,13 +5981,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) err = hw->mac.ops.init_hw(hw); switch (err) { case 0: - case IXGBE_ERR_SFP_NOT_PRESENT: - case IXGBE_ERR_SFP_NOT_SUPPORTED: + case -ENOENT: + case -EOPNOTSUPP: break; - case IXGBE_ERR_PRIMARY_REQUESTS_PENDING: + case -EALREADY: e_dev_err("primary disable timed out\n"); break; - case IXGBE_ERR_EEPROM_VERSION: + case -EACCES: /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated with " @@ -7829,10 +7827,10 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) adapter->sfp_poll_time = jiffies + IXGBE_SFP_POLL_JIFFIES - 1; err = hw->phy.ops.identify_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (err == -EOPNOTSUPP) goto sfp_out; - if (err == IXGBE_ERR_SFP_NOT_PRESENT) { + if (err == -ENOENT) { /* If no cable is present, then we need to reset * the next time we find a good cable. */ adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET; @@ -7858,7 +7856,7 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) else err = hw->mac.ops.setup_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (err == -EOPNOTSUPP) goto sfp_out; adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; @@ -7867,8 +7865,8 @@ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) sfp_out: clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - if ((err == IXGBE_ERR_SFP_NOT_SUPPORTED) && - (adapter->netdev->reg_state == NETREG_REGISTERED)) { + if (err == -EOPNOTSUPP && + adapter->netdev->reg_state == NETREG_REGISTERED) { e_dev_err("failed to initialize because an unsupported " "SFP+ module type was detected.\n"); e_dev_err("Reload the driver after installing a " @@ -7938,7 +7936,7 @@ static void ixgbe_service_timer(struct timer_list *t) static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 status; + bool overtemp; if (!(adapter->flags2 & IXGBE_FLAG2_PHY_INTERRUPT)) return; @@ -7948,11 +7946,9 @@ static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) if (!hw->phy.ops.handle_lasi) return; - status = hw->phy.ops.handle_lasi(&adapter->hw); - if (status != IXGBE_ERR_OVERTEMP) - return; - - e_crit(drv, "%s\n", ixgbe_overheat_msg); + hw->phy.ops.handle_lasi(&adapter->hw, &overtemp); + if (overtemp) + e_crit(drv, "%s\n", ixgbe_overheat_msg); } static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) @@ -10922,9 +10918,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = hw->mac.ops.reset_hw(hw); hw->phy.reset_if_overtemp = false; ixgbe_set_eee_capable(adapter); - if (err == IXGBE_ERR_SFP_NOT_PRESENT) { + if (err == -ENOENT) { err = 0; - } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + } else if (err == -EOPNOTSUPP) { e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n"); e_dev_err("Reload the driver after installing a supported module.\n"); goto err_sw_init; @@ -11143,7 +11139,7 @@ skip_sriov: /* reset the hardware with the new settings */ err = hw->mac.ops.start_hw(hw); - if (err == IXGBE_ERR_EEPROM_VERSION) { + if (err == -EACCES) { /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated " @@ -11371,7 +11367,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, if ((pf_func & 1) == (pdev->devfn & 1)) { unsigned int device_id; - vf = (req_id & 0x7F) >> 1; + vf = FIELD_GET(0x7F, req_id); e_dev_err("VF %d has caused a PCIe error\n", vf); e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: " "%8.8x\tdw3: %8.8x\n", diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c index 5679293e53f7..fe7ef5773369 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c @@ -24,7 +24,7 @@ s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) size = mbx->size; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->read(hw, msg, size, mbx_id); } @@ -43,10 +43,10 @@ s32 ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (size > mbx->size) - return IXGBE_ERR_MBX; + return -EINVAL; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->write(hw, msg, size, mbx_id); } @@ -63,7 +63,7 @@ s32 ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_msg(hw, mbx_id); } @@ -80,7 +80,7 @@ s32 ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_ack(hw, mbx_id); } @@ -97,7 +97,7 @@ s32 ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id) struct ixgbe_mbx_info *mbx = &hw->mbx; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; return mbx->ops->check_for_rst(hw, mbx_id); } @@ -115,12 +115,12 @@ static s32 ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id) int countdown = mbx->timeout; if (!countdown || !mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; while (mbx->ops->check_for_msg(hw, mbx_id)) { countdown--; if (!countdown) - return IXGBE_ERR_MBX; + return -EIO; udelay(mbx->usec_delay); } @@ -140,12 +140,12 @@ static s32 ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id) int countdown = mbx->timeout; if (!countdown || !mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; while (mbx->ops->check_for_ack(hw, mbx_id)) { countdown--; if (!countdown) - return IXGBE_ERR_MBX; + return -EIO; udelay(mbx->usec_delay); } @@ -169,7 +169,7 @@ static s32 ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, s32 ret_val; if (!mbx->ops) - return IXGBE_ERR_MBX; + return -EIO; ret_val = ixgbe_poll_for_msg(hw, mbx_id); if (ret_val) @@ -197,7 +197,7 @@ static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, /* exit if either we can't write or there isn't a defined timeout */ if (!mbx->ops || !mbx->timeout) - return IXGBE_ERR_MBX; + return -EIO; /* send msg */ ret_val = mbx->ops->write(hw, msg, size, mbx_id); @@ -217,7 +217,7 @@ static s32 ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -238,7 +238,7 @@ static s32 ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -259,7 +259,7 @@ static s32 ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -295,7 +295,7 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number) return 0; } - return IXGBE_ERR_MBX; + return -EIO; } /** @@ -317,7 +317,7 @@ static s32 ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number) if (p2v_mailbox & IXGBE_PFMAILBOX_PFU) return 0; - return IXGBE_ERR_MBX; + return -EIO; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index 8f4316b19278..6434c190e7a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -7,7 +7,6 @@ #include "ixgbe_type.h" #define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ -#define IXGBE_ERR_MBX -100 #define IXGBE_VFMAILBOX 0x002FC #define IXGBE_VFMBMEM 0x00200 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 689470c1e8ad..f28140a05f09 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -102,7 +102,7 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, csum = ~csum; do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); /* Device Address and write indication */ if (ixgbe_out_i2c_byte_ack(hw, addr)) @@ -150,7 +150,7 @@ fail: hw_dbg(hw, "I2C byte read combined error.\n"); } while (retry < max_retry); - return IXGBE_ERR_I2C; + return -EIO; } /** @@ -179,7 +179,7 @@ s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, csum = ~csum; do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); /* Device Address and write indication */ if (ixgbe_out_i2c_byte_ack(hw, addr)) @@ -215,7 +215,7 @@ fail: hw_dbg(hw, "I2C byte write combined error.\n"); } while (retry < max_retry); - return IXGBE_ERR_I2C; + return -EIO; } /** @@ -262,8 +262,8 @@ static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) **/ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { + u32 status = -EFAULT; u32 phy_addr; - u32 status = IXGBE_ERR_PHY_ADDR_INVALID; if (!hw->phy.phy_semaphore_mask) { if (hw->bus.lan_id) @@ -276,13 +276,12 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) return 0; if (hw->phy.nw_mng_if_sel) { - phy_addr = (hw->phy.nw_mng_if_sel & - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + phy_addr = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD, + hw->phy.nw_mng_if_sel); if (ixgbe_probe_phy(hw, phy_addr)) return 0; else - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; } for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { @@ -408,8 +407,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) return status; /* Don't reset PHY if it's shut down due to overtemp. */ - if (!hw->phy.reset_if_overtemp && - (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw))) + if (!hw->phy.reset_if_overtemp && hw->phy.ops.check_overtemp(hw)) return 0; /* Blocked by MNG FW so bail */ @@ -457,7 +455,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) if (ctrl & MDIO_CTRL1_RESET) { hw_dbg(hw, "PHY reset polling failed to complete.\n"); - return IXGBE_ERR_RESET_FAILED; + return -EIO; } return 0; @@ -500,7 +498,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY address command did not complete.\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Address cycle complete, setup and write the read @@ -527,7 +525,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY read command didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Read operation is complete. Get the data @@ -559,7 +557,7 @@ s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } return status; @@ -604,7 +602,7 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY address cmd didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* @@ -632,7 +630,7 @@ s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { hw_dbg(hw, "PHY write cmd didn't complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -657,7 +655,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } return status; @@ -1430,7 +1428,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) if ((phy_data & MDIO_CTRL1_RESET) != 0) { hw_dbg(hw, "PHY reset did not complete.\n"); - return IXGBE_ERR_PHY; + return -EIO; } /* Get init offsets */ @@ -1448,8 +1446,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) ret_val = hw->eeprom.ops.read(hw, data_offset, &eword); if (ret_val) goto err_eeprom; - control = (eword & IXGBE_CONTROL_MASK_NL) >> - IXGBE_CONTROL_SHIFT_NL; + control = FIELD_GET(IXGBE_CONTROL_MASK_NL, eword); edata = eword & IXGBE_DATA_MASK_NL; switch (control) { case IXGBE_DELAY_NL: @@ -1487,12 +1484,12 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) hw_dbg(hw, "SOL\n"); } else { hw_dbg(hw, "Bad control value\n"); - return IXGBE_ERR_PHY; + return -EIO; } break; default: hw_dbg(hw, "Bad control type\n"); - return IXGBE_ERR_PHY; + return -EIO; } } @@ -1500,7 +1497,7 @@ s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) err_eeprom: hw_err(hw, "eeprom read at offset %d failed\n", data_offset); - return IXGBE_ERR_PHY; + return -EIO; } /** @@ -1518,10 +1515,10 @@ s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw) return ixgbe_identify_qsfp_module_generic(hw); default: hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1546,7 +1543,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /* LAN ID is needed for sfp_type determination */ @@ -1561,7 +1558,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_1GBE_COMP_CODES, @@ -1752,7 +1749,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } /* Anything else 82598-based is supported */ @@ -1776,7 +1773,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) } hw_dbg(hw, "SFP+ module not supported\n"); hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; @@ -1786,7 +1783,7 @@ err_read_i2c_eeprom: hw->phy.id = 0; hw->phy.type = ixgbe_phy_unknown; } - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1813,7 +1810,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { hw->phy.sfp_type = ixgbe_sfp_type_not_present; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /* LAN ID is needed for sfp_type determination */ @@ -1827,7 +1824,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) if (identifier != IXGBE_SFF_IDENTIFIER_QSFP_PLUS) { hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } hw->phy.id = identifier; @@ -1895,7 +1892,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) } else { /* unsupported module type */ hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } } @@ -1955,7 +1952,7 @@ static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) } hw_dbg(hw, "QSFP module not supported\n"); hw->phy.type = ixgbe_phy_sfp_unsupported; - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; } @@ -1966,7 +1963,7 @@ err_read_i2c_eeprom: hw->phy.id = 0; hw->phy.type = ixgbe_phy_unknown; - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; } /** @@ -1986,14 +1983,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 sfp_type = hw->phy.sfp_type; if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) && (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; /* * Limiting active cables and 1G Phys must be initialized as @@ -2014,11 +2011,11 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset)) { hw_err(hw, "eeprom read at %d failed\n", IXGBE_PHY_INIT_OFFSET_NL); - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + return -EIO; } if ((!*list_offset) || (*list_offset == 0xFFFF)) - return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + return -EIO; /* Shift offset to first ID word */ (*list_offset)++; @@ -2037,7 +2034,7 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, goto err_phy; if ((!*data_offset) || (*data_offset == 0xFFFF)) { hw_dbg(hw, "SFP+ module not supported\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } else { break; } @@ -2050,14 +2047,14 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (sfp_id == IXGBE_PHY_INIT_END_NL) { hw_dbg(hw, "No matching SFP+ module found\n"); - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; err_phy: hw_err(hw, "eeprom read at offset %d failed\n", *list_offset); - return IXGBE_ERR_PHY; + return -EIO; } /** @@ -2152,7 +2149,7 @@ static s32 ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, do { if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; ixgbe_i2c_start(hw); @@ -2268,7 +2265,7 @@ static s32 ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset, u32 swfw_mask = hw->phy.phy_semaphore_mask; if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; do { ixgbe_i2c_start(hw); @@ -2510,7 +2507,7 @@ static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw) if (ack == 1) { hw_dbg(hw, "I2C ack was not received.\n"); - status = IXGBE_ERR_I2C; + status = -EIO; } ixgbe_lower_i2c_clk(hw, &i2cctl); @@ -2582,7 +2579,7 @@ static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data) udelay(IXGBE_I2C_T_LOW); } else { hw_dbg(hw, "I2C data was not set to %X\n", data); - return IXGBE_ERR_I2C; + return -EIO; } return 0; @@ -2678,7 +2675,7 @@ static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw)); if (data != ixgbe_get_i2c_data(hw, i2cctl)) { hw_dbg(hw, "Error - I2C data was not set to %X.\n", data); - return IXGBE_ERR_I2C; + return -EIO; } return 0; @@ -2748,22 +2745,24 @@ static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Checks if the LASI temp alarm status was triggered due to overtemp + * + * Return true when an overtemp event detected, otherwise false. **/ -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) +bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) { u16 phy_data = 0; + u32 status; if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) - return 0; + return false; /* Check that the LASI temp alarm status was triggered */ - hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, - MDIO_MMD_PMAPMD, &phy_data); - - if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) - return 0; + status = hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, + MDIO_MMD_PMAPMD, &phy_data); + if (status) + return false; - return IXGBE_ERR_OVERTEMP; + return !!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM); } /** ixgbe_set_copper_phy_power - Control power for copper phy diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 6544c4539c0d..ef72729d7c93 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -155,7 +155,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *list_offset, u16 *data_offset); -s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); +bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data); s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 9cfdfa8a4355..7299a830f6e4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -363,8 +363,7 @@ int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs) static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) { - int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) - >> IXGBE_VT_MSGINFO_SHIFT; + int entries = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]); u16 *hash_list = (u16 *)&msgbuf[1]; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; struct ixgbe_hw *hw = &adapter->hw; @@ -969,7 +968,7 @@ static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter, static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) { - u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; + u32 add = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]); u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); u8 tcs = adapter->hw_tcs; @@ -992,8 +991,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) { u8 *new_mac = ((u8 *)(&msgbuf[1])); - int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> - IXGBE_VT_MSGINFO_SHIFT; + int index = FIELD_GET(IXGBE_VT_MSGINFO_MASK, msgbuf[0]); int err; if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted && @@ -1327,7 +1325,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); - retval = IXGBE_ERR_MBX; + retval = -EIO; break; } @@ -1849,5 +1847,6 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev, ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled; ivi->rss_query_en = adapter->vfinfo[vf].rss_query_enabled; ivi->trusted = adapter->vfinfo[vf].trusted; + ivi->linkstate = adapter->vfinfo[vf].link_state; return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2b00db92b08f..61b9774b3d31 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3509,10 +3509,10 @@ struct ixgbe_phy_operations { s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *); s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); - s32 (*check_overtemp)(struct ixgbe_hw *); + bool (*check_overtemp)(struct ixgbe_hw *); s32 (*set_phy_power)(struct ixgbe_hw *, bool on); s32 (*enter_lplu)(struct ixgbe_hw *); - s32 (*handle_lasi)(struct ixgbe_hw *hw); + s32 (*handle_lasi)(struct ixgbe_hw *hw, bool *); s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, u8 *value); s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, @@ -3665,45 +3665,6 @@ struct ixgbe_info { const u32 *mvals; }; - -/* Error Codes */ -#define IXGBE_ERR_EEPROM -1 -#define IXGBE_ERR_EEPROM_CHECKSUM -2 -#define IXGBE_ERR_PHY -3 -#define IXGBE_ERR_CONFIG -4 -#define IXGBE_ERR_PARAM -5 -#define IXGBE_ERR_MAC_TYPE -6 -#define IXGBE_ERR_UNKNOWN_PHY -7 -#define IXGBE_ERR_LINK_SETUP -8 -#define IXGBE_ERR_ADAPTER_STOPPED -9 -#define IXGBE_ERR_INVALID_MAC_ADDR -10 -#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 -#define IXGBE_ERR_PRIMARY_REQUESTS_PENDING -12 -#define IXGBE_ERR_INVALID_LINK_SETTINGS -13 -#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 -#define IXGBE_ERR_RESET_FAILED -15 -#define IXGBE_ERR_SWFW_SYNC -16 -#define IXGBE_ERR_PHY_ADDR_INVALID -17 -#define IXGBE_ERR_I2C -18 -#define IXGBE_ERR_SFP_NOT_SUPPORTED -19 -#define IXGBE_ERR_SFP_NOT_PRESENT -20 -#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 -#define IXGBE_ERR_NO_SAN_ADDR_PTR -22 -#define IXGBE_ERR_FDIR_REINIT_FAILED -23 -#define IXGBE_ERR_EEPROM_VERSION -24 -#define IXGBE_ERR_NO_SPACE -25 -#define IXGBE_ERR_OVERTEMP -26 -#define IXGBE_ERR_FC_NOT_NEGOTIATED -27 -#define IXGBE_ERR_FC_NOT_SUPPORTED -28 -#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30 -#define IXGBE_ERR_PBA_SECTION -31 -#define IXGBE_ERR_INVALID_ARGUMENT -32 -#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33 -#define IXGBE_ERR_FDIR_CMD_INCOMPLETE -38 -#define IXGBE_ERR_FW_RESP_INVALID -39 -#define IXGBE_ERR_TOKEN_RETRY -40 -#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF - #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ BIT(5) #define IXGBE_FUSES0_REV_MASK (3u << 6) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index d5cfb51ff648..57a912e4653f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -84,7 +84,7 @@ mac_reset_top: status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); if (status) { hw_dbg(hw, "semaphore failed with %d", status); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ctrl = IXGBE_CTRL_RST; @@ -103,7 +103,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } msleep(100); @@ -187,16 +187,16 @@ s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw) s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw) { struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - u32 eec; - u16 eeprom_size; if (eeprom->type == ixgbe_eeprom_uninitialized) { + u16 eeprom_size; + u32 eec; + eeprom->semaphore_delay = 10; eeprom->type = ixgbe_flash; eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); - eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> - IXGBE_EEC_SIZE_SHIFT); + eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec); eeprom->word_size = BIT(eeprom_size + IXGBE_EEPROM_WORD_SIZE_SHIFT); @@ -220,7 +220,7 @@ static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data) s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_read_eerd_generic(hw, offset, data); @@ -243,7 +243,7 @@ static s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_read_eerd_buffer_generic(hw, offset, words, data); @@ -264,7 +264,7 @@ static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data) s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_eewr_generic(hw, offset, data); @@ -287,7 +287,7 @@ static s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_eewr_buffer_generic(hw, offset, words, data); @@ -324,7 +324,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) for (i = 0; i < checksum_last_word; i++) { if (ixgbe_read_eerd_generic(hw, i, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -349,7 +349,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) if (ixgbe_read_eerd_generic(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Skip pointer section if length is invalid. */ @@ -360,7 +360,7 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) for (j = pointer + 1; j <= pointer + length; j++) { if (ixgbe_read_eerd_generic(hw, j, &word)) { hw_dbg(hw, "EEPROM read failed\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } checksum += word; } @@ -397,7 +397,7 @@ static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, } if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->eeprom.ops.calc_checksum(hw); if (status < 0) @@ -418,7 +418,7 @@ static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, */ if (read_checksum != checksum) { hw_dbg(hw, "Invalid EEPROM checksum"); - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; } /* If the user cares, return the calculated checksum */ @@ -455,7 +455,7 @@ static s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw) } if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->eeprom.ops.calc_checksum(hw); if (status < 0) @@ -490,7 +490,7 @@ static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw) s32 status; status = ixgbe_poll_flash_update_done_X540(hw); - if (status == IXGBE_ERR_EEPROM) { + if (status == -EIO) { hw_dbg(hw, "Flash update time out\n"); return status; } @@ -540,7 +540,7 @@ static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw) return 0; udelay(5); } - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -575,7 +575,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * SW_FW_SYNC bits (not just NVM) */ if (ixgbe_get_swfw_sync_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw)); if (!(swfw_sync & (fwmask | swmask | hwmask))) { @@ -599,7 +599,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * bits in the SW_FW_SYNC register. */ if (ixgbe_get_swfw_sync_semaphore(hw)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw)); if (swfw_sync & (fwmask | hwmask)) { swfw_sync |= swmask; @@ -622,11 +622,11 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) rmask |= IXGBE_GSSR_I2C_MASK; ixgbe_release_swfw_sync_X540(hw, rmask); ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } /** @@ -680,7 +680,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) if (i == timeout) { hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n"); - return IXGBE_ERR_EEPROM; + return -EIO; } /* Now get the semaphore between SW/FW through the REGSMP bit */ @@ -697,7 +697,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) */ hw_dbg(hw, "REGSMP Software NVM semaphore not granted\n"); ixgbe_release_swfw_sync_semaphore(hw); - return IXGBE_ERR_EEPROM; + return -EIO; } /** @@ -768,7 +768,7 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) bool link_up; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Link should be up in order for the blink bit in the LED control * register to work. Force link and speed in the MAC if link is down. @@ -804,7 +804,7 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) u32 ledctl_reg; if (index > 3) - return IXGBE_ERR_PARAM; + return -EINVAL; /* Restore the LED to its default value. */ ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index aa4bf6c9a2f7..6208923e29a2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -206,13 +206,13 @@ static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw) } if (retry == IXGBE_CS4227_RETRIES) { hw_err(hw, "CS4227 reset did not complete\n"); - return IXGBE_ERR_PHY; + return -EIO; } status = ixgbe_read_cs4227(hw, IXGBE_CS4227_EEPROM_STATUS, &value); if (status || !(value & IXGBE_CS4227_EEPROM_LOAD_OK)) { hw_err(hw, "CS4227 EEPROM did not load successfully\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -350,13 +350,13 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) static s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 *phy_data) { - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; } static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 phy_data) { - return IXGBE_NOT_IMPLEMENTED; + return -EOPNOTSUPP; } /** @@ -463,7 +463,7 @@ s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity, --retries; } while (retries > 0); - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; } static const struct { @@ -511,7 +511,7 @@ static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw) hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK; hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK; if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK) - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; hw->phy.autoneg_advertised = hw->phy.speeds_supported; hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL | @@ -568,7 +568,7 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } switch (hw->fc.requested_mode) { @@ -600,8 +600,10 @@ static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup); if (rc) return rc; + if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN) - return IXGBE_ERR_OVERTEMP; + return -EIO; + return 0; } @@ -628,16 +630,16 @@ static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw) static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) { struct ixgbe_eeprom_info *eeprom = &hw->eeprom; - u32 eec; - u16 eeprom_size; if (eeprom->type == ixgbe_eeprom_uninitialized) { + u16 eeprom_size; + u32 eec; + eeprom->semaphore_delay = 10; eeprom->type = ixgbe_flash; eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw)); - eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> - IXGBE_EEC_SIZE_SHIFT); + eeprom_size = FIELD_GET(IXGBE_EEC_SIZE, eec); eeprom->word_size = BIT(eeprom_size + IXGBE_EEPROM_WORD_SIZE_SHIFT); @@ -675,7 +677,7 @@ static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) *ctrl = command; if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { hw_dbg(hw, "IOSF wait timed out\n"); - return IXGBE_ERR_PHY; + return -EIO; } return 0; @@ -712,10 +714,9 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, ret = ixgbe_iosf_wait(hw, &command); if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { - error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> - IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; + error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); hw_dbg(hw, "Failed to read, error %x\n", error); - return IXGBE_ERR_PHY; + return -EIO; } if (!ret) @@ -750,9 +751,9 @@ static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw) if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return 0; if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) - return IXGBE_ERR_FW_RESP_INVALID; + return -EIO; - return IXGBE_ERR_TOKEN_RETRY; + return -EAGAIN; } /** @@ -778,7 +779,7 @@ static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw) return status; if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return 0; - return IXGBE_ERR_FW_RESP_INVALID; + return -EIO; } /** @@ -942,7 +943,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr, local_buffer = buf; } else { if (buffer_size < ptr) - return IXGBE_ERR_PARAM; + return -EINVAL; local_buffer = &buffer[ptr]; } @@ -960,7 +961,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr, } if (buffer && ((u32)start + (u32)length > buffer_size)) - return IXGBE_ERR_PARAM; + return -EINVAL; for (i = start; length; i++, length--) { if (i == bufsz && !buffer) { @@ -1012,7 +1013,7 @@ static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer, local_buffer = eeprom_ptrs; } else { if (buffer_size < IXGBE_EEPROM_LAST_WORD) - return IXGBE_ERR_PARAM; + return -EINVAL; local_buffer = buffer; } @@ -1148,7 +1149,7 @@ static s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw, * calculated checksum */ if (read_checksum != checksum) { - status = IXGBE_ERR_EEPROM_CHECKSUM; + status = -EIO; hw_dbg(hw, "Invalid EEPROM checksum"); } @@ -1203,7 +1204,7 @@ static s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data) hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); } else { hw_dbg(hw, "write ee hostif failed to get semaphore"); - status = IXGBE_ERR_SWFW_SYNC; + status = -EBUSY; } return status; @@ -1412,10 +1413,9 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, ret = ixgbe_iosf_wait(hw, &command); if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { - error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> - IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT; + error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); hw_dbg(hw, "Failed to write, error %x\n", error); - return IXGBE_ERR_PHY; + return -EIO; } out: @@ -1558,7 +1558,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) /* iXFI is only supported with X552 */ if (mac->type != ixgbe_mac_X550EM_x) - return IXGBE_ERR_LINK_SETUP; + return -EIO; /* Disable AN and force speed to 10G Serial. */ status = ixgbe_read_iosf_sb_reg_x550(hw, @@ -1580,7 +1580,7 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) break; default: /* Other link speeds are not supported by internal KR PHY. */ - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; } status = ixgbe_write_iosf_sb_reg_x550(hw, @@ -1611,7 +1611,7 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) { switch (hw->phy.sfp_type) { case ixgbe_sfp_type_not_present: - return IXGBE_ERR_SFP_NOT_PRESENT; + return -ENOENT; case ixgbe_sfp_type_da_cu_core0: case ixgbe_sfp_type_da_cu_core1: *linear = true; @@ -1630,7 +1630,7 @@ static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) case ixgbe_sfp_type_1g_cu_core0: case ixgbe_sfp_type_1g_cu_core1: default: - return IXGBE_ERR_SFP_NOT_SUPPORTED; + return -EOPNOTSUPP; } return 0; @@ -1660,7 +1660,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, * there is no reason to configure CS4227 and SFP not present error is * not accepted in the setup MAC link flow. */ - if (status == IXGBE_ERR_SFP_NOT_PRESENT) + if (status == -ENOENT) return 0; if (status) @@ -1718,7 +1718,7 @@ static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) break; default: /* Other link speeds are not supported by internal PHY. */ - return IXGBE_ERR_LINK_SETUP; + return -EINVAL; } (void)mac->ops.write_iosf_sb_reg(hw, @@ -1803,7 +1803,7 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == -ENOENT) return 0; if (ret_val) @@ -1853,7 +1853,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == -ENOENT) return 0; if (ret_val) @@ -1863,7 +1863,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, ixgbe_setup_kr_speed_x550em(hw, speed); if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) - return IXGBE_ERR_PHY_ADDR_INVALID; + return -EFAULT; /* Get external PHY SKU id */ ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU, @@ -1962,7 +1962,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, u16 i, autoneg_status; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) - return IXGBE_ERR_CONFIG; + return -EIO; status = ixgbe_check_mac_link_generic(hw, speed, link_up, link_up_wait_to_complete); @@ -2145,9 +2145,9 @@ static s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed, */ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) { - s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; ixgbe_link_speed speed; + s32 status = -EIO; bool link_up; /* AN should have completed when the cable was plugged in. @@ -2165,7 +2165,7 @@ static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) /* Check if auto-negotiation has completed */ status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info); if (status || !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) { - status = IXGBE_ERR_FC_NOT_NEGOTIATED; + status = -EIO; goto out; } @@ -2369,18 +2369,18 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, * @hw: pointer to hardware structure * @lsc: pointer to boolean flag which indicates whether external Base T * PHY interrupt is lsc + * @is_overtemp: indicate whether an overtemp event encountered * * Determime if external Base T PHY interrupt cause is high temperature * failure alarm or link status change. - * - * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature - * failure alarm, else return PHY access status. **/ -static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) +static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc, + bool *is_overtemp) { u32 status; u16 reg; + *is_overtemp = false; *lsc = false; /* Vendor alarm triggered */ @@ -2412,7 +2412,8 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg & IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL) { /* power down the PHY in case the PHY FW didn't already */ ixgbe_set_copper_phy_power(hw, false); - return IXGBE_ERR_OVERTEMP; + *is_overtemp = true; + return -EIO; } if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { /* device fault alarm triggered */ @@ -2426,7 +2427,8 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { /* power down the PHY in case the PHY FW didn't */ ixgbe_set_copper_phy_power(hw, false); - return IXGBE_ERR_OVERTEMP; + *is_overtemp = true; + return -EIO; } } @@ -2462,12 +2464,12 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) **/ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) { + bool lsc, overtemp; u32 status; u16 reg; - bool lsc; /* Clear interrupt flags */ - status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); + status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, &overtemp); /* Enable link status change alarm */ @@ -2546,21 +2548,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) /** * ixgbe_handle_lasi_ext_t_x550em - Handle external Base T PHY interrupt * @hw: pointer to hardware structure + * @is_overtemp: indicate whether an overtemp event encountered * * Handle external Base T PHY interrupt. If high temperature * failure alarm then return error, else if link status change * then setup internal/external PHY link - * - * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature - * failure alarm, else return PHY access status. **/ -static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw) +static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw, + bool *is_overtemp) { struct ixgbe_phy_info *phy = &hw->phy; bool lsc; u32 status; - status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); + status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, is_overtemp); if (status) return status; @@ -2692,7 +2693,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) u16 speed; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) - return IXGBE_ERR_CONFIG; + return -EIO; if (!(hw->mac.type == ixgbe_mac_X550EM_x && !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) { @@ -2735,7 +2736,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) break; default: /* Internal PHY does not support anything else */ - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } return ixgbe_setup_ixfi_x550em(hw, &force_speed); @@ -2767,7 +2768,7 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) u16 phy_data; if (led_idx >= IXGBE_X557_MAX_LED_INDEX) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, @@ -2789,7 +2790,7 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) u16 phy_data; if (led_idx >= IXGBE_X557_MAX_LED_INDEX) - return IXGBE_ERR_PARAM; + return -EINVAL; /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, @@ -2813,8 +2814,9 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * * Sends driver version number to firmware through the manageability * block. On success return 0 - * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring - * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * else returns -EBUSY when encountering an error acquiring + * semaphore, -EIO when command fails or -ENIVAL when incorrect + * params passed. **/ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, u8 sub, u16 len, @@ -2825,7 +2827,7 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, int i; if (!len || !driver_ver || (len > sizeof(fw_cmd.driver_string))) - return IXGBE_ERR_INVALID_ARGUMENT; + return -EINVAL; fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len; @@ -2850,7 +2852,7 @@ static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, if (fw_cmd.hdr.cmd_or_resp.ret_status != FW_CEM_RESP_STATUS_SUCCESS) - return IXGBE_ERR_HOST_INTERFACE_COMMAND; + return -EIO; return 0; } @@ -2907,7 +2909,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } /* 10gig parts do not have a word in the EEPROM to determine the @@ -2942,7 +2944,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) break; default: hw_err(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } switch (hw->device_id) { @@ -2986,8 +2988,8 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) { u32 link_s1, lp_an_page_low, an_cntl_1; - s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; ixgbe_link_speed speed; + s32 status = -EIO; bool link_up; /* AN should have completed when the cable was plugged in. @@ -3013,7 +3015,7 @@ static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { hw_dbg(hw, "Auto-Negotiation did not complete\n"); - status = IXGBE_ERR_FC_NOT_NEGOTIATED; + status = -EIO; goto out; } @@ -3187,21 +3189,23 @@ static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw) /** * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp * @hw: pointer to hardware structure + * + * Return true when an overtemp event detected, otherwise false. */ -static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) +static bool ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) { u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store); if (rc) - return rc; + return false; if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) { ixgbe_shutdown_fw_phy(hw); - return IXGBE_ERR_OVERTEMP; + return true; } - return 0; + return false; } /** @@ -3222,9 +3226,8 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw) */ if (hw->mac.type == ixgbe_mac_x550em_a && hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) { - hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel & - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + hw->phy.mdio.prtad = FIELD_GET(IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD, + hw->phy.nw_mng_if_sel); } } @@ -3251,8 +3254,7 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); - if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || - ret_val == IXGBE_ERR_PHY_ADDR_INVALID) + if (ret_val == -EOPNOTSUPP || ret_val == -EFAULT) return ret_val; /* Setup function pointers based on detected hardware */ @@ -3460,8 +3462,7 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || - status == IXGBE_ERR_PHY_ADDR_INVALID) + if (status == -EOPNOTSUPP || status == -EFAULT) return status; /* start the external PHY */ @@ -3477,7 +3478,7 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status == -EOPNOTSUPP) return status; /* Reset PHY */ @@ -3501,7 +3502,7 @@ mac_reset_top: status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); if (status) { hw_dbg(hw, "semaphore failed with %d", status); - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; } ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); @@ -3519,7 +3520,7 @@ mac_reset_top: } if (ctrl & IXGBE_CTRL_RST_MASK) { - status = IXGBE_ERR_RESET_FAILED; + status = -EIO; hw_dbg(hw, "Reset polling failed to complete.\n"); } @@ -3615,7 +3616,7 @@ static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); - return IXGBE_ERR_INVALID_LINK_SETTINGS; + return -EINVAL; } if (hw->fc.requested_mode == ixgbe_fc_default) @@ -3672,7 +3673,7 @@ static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) break; default: hw_err(hw, "Flow control param set incorrectly\n"); - return IXGBE_ERR_CONFIG; + return -EIO; } status = hw->mac.ops.write_iosf_sb_reg(hw, @@ -3768,7 +3769,7 @@ static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask) return 0; if (hmask) ixgbe_release_swfw_sync_X540(hw, hmask); - if (status != IXGBE_ERR_TOKEN_RETRY) + if (status != -EAGAIN) return status; msleep(FW_PHY_TOKEN_DELAY); } @@ -3812,7 +3813,7 @@ static s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = hw->phy.ops.read_reg_mdi(hw, reg_addr, device_type, phy_data); @@ -3838,7 +3839,7 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, s32 status; if (hw->mac.ops.acquire_swfw_sync(hw, mask)) - return IXGBE_ERR_SWFW_SYNC; + return -EBUSY; status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, mask); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 1ca273f17d29..820b1fabe297 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6877,7 +6877,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->min_mtu = ETH_MIN_MTU; /* 9704 == 9728 - 20 and rounding to 8 */ dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE; - dev->dev.of_node = port_node; + device_set_node(&dev->dev, port_fwnode); port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; port->pcs_gmac.neg_mode = true; diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile index 02a4a21bc298..62162ed63f34 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/Makefile +++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEON_EP) += octeon_ep.o octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \ octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o \ - octep_cnxk_pf.o + octep_pfvf_mbox.o octep_cnxk_pf.o diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c index 9209f1ec1b52..b5805969404f 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c @@ -362,16 +362,55 @@ static void octep_setup_mbox_regs_cn93_pf(struct octep_device *oct, int q_no) { struct octep_mbox *mbox = oct->mbox[q_no]; - mbox->q_no = q_no; - - /* PF mbox interrupt reg */ - mbox->mbox_int_reg = oct->mmio[0].hw_addr + CN93_SDP_EPF_MBOX_RINT(0); - /* PF to VF DATA reg. PF writes into this reg */ - mbox->mbox_write_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_PF_VF_DATA(q_no); + mbox->pf_vf_data_reg = oct->mmio[0].hw_addr + CN93_SDP_MBOX_PF_VF_DATA(q_no); /* VF to PF DATA reg. PF reads from this reg */ - mbox->mbox_read_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_VF_PF_DATA(q_no); + mbox->vf_pf_data_reg = oct->mmio[0].hw_addr + CN93_SDP_MBOX_VF_PF_DATA(q_no); +} + +/* Poll for mailbox messages from VF */ +static void octep_poll_pfvf_mailbox(struct octep_device *oct) +{ + u32 vf, active_vfs, active_rings_per_vf, vf_mbox_queue; + u64 reg0, reg1; + + reg0 = octep_read_csr64(oct, CN93_SDP_EPF_MBOX_RINT(0)); + reg1 = octep_read_csr64(oct, CN93_SDP_EPF_MBOX_RINT(1)); + if (reg0 || reg1) { + active_vfs = CFG_GET_ACTIVE_VFS(oct->conf); + active_rings_per_vf = CFG_GET_ACTIVE_RPVF(oct->conf); + for (vf = 0; vf < active_vfs; vf++) { + vf_mbox_queue = vf * active_rings_per_vf; + + if (vf_mbox_queue < 64) { + if (!(reg0 & (0x1UL << vf_mbox_queue))) + continue; + } else { + if (!(reg1 & (0x1UL << (vf_mbox_queue - 64)))) + continue; + } + + if (!oct->mbox[vf_mbox_queue]) { + dev_err(&oct->pdev->dev, "bad mbox vf %d\n", vf); + continue; + } + schedule_work(&oct->mbox[vf_mbox_queue]->wk.work); + } + if (reg0) + octep_write_csr64(oct, CN93_SDP_EPF_MBOX_RINT(0), reg0); + if (reg1) + octep_write_csr64(oct, CN93_SDP_EPF_MBOX_RINT(1), reg1); + } +} + +/* PF-VF mailbox interrupt handler */ +static irqreturn_t octep_pfvf_mbox_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + + octep_poll_pfvf_mailbox(oct); + return IRQ_HANDLED; } /* Poll OEI events like heartbeat */ @@ -403,6 +442,7 @@ static irqreturn_t octep_oei_intr_handler_cn93_pf(void *dev) */ static void octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct) { + octep_poll_pfvf_mailbox(oct); octep_poll_oei_cn93_pf(oct); } @@ -646,6 +686,8 @@ static void octep_enable_interrupts_cn93_pf(struct octep_device *oct) octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_MBOX_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_MBOX_RINT_ENA_W1S(1), -1ULL); octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL); octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL); @@ -672,6 +714,8 @@ static void octep_disable_interrupts_cn93_pf(struct octep_device *oct) octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_MBOX_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_MBOX_RINT_ENA_W1C(1), -1ULL); octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL); octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL); @@ -807,6 +851,7 @@ void octep_device_setup_cn93_pf(struct octep_device *oct) oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cn93_pf; oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cn93_pf; + oct->hw_ops.mbox_intr_handler = octep_pfvf_mbox_intr_handler_cn93_pf; oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cn93_pf; oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cn93_pf; oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cn93_pf; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c index 098a0c5c4d1c..5de0b5ecbc5f 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c @@ -392,16 +392,44 @@ static void octep_setup_mbox_regs_cnxk_pf(struct octep_device *oct, int q_no) { struct octep_mbox *mbox = oct->mbox[q_no]; - mbox->q_no = q_no; - - /* PF mbox interrupt reg */ - mbox->mbox_int_reg = oct->mmio[0].hw_addr + CNXK_SDP_EPF_MBOX_RINT(0); - /* PF to VF DATA reg. PF writes into this reg */ - mbox->mbox_write_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_PF_VF_DATA(q_no); + mbox->pf_vf_data_reg = oct->mmio[0].hw_addr + CNXK_SDP_MBOX_PF_VF_DATA(q_no); /* VF to PF DATA reg. PF reads from this reg */ - mbox->mbox_read_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_VF_PF_DATA(q_no); + mbox->vf_pf_data_reg = oct->mmio[0].hw_addr + CNXK_SDP_MBOX_VF_PF_DATA(q_no); +} + +static void octep_poll_pfvf_mailbox_cnxk_pf(struct octep_device *oct) +{ + u32 vf, active_vfs, active_rings_per_vf, vf_mbox_queue; + u64 reg0; + + reg0 = octep_read_csr64(oct, CNXK_SDP_EPF_MBOX_RINT(0)); + if (reg0) { + active_vfs = CFG_GET_ACTIVE_VFS(oct->conf); + active_rings_per_vf = CFG_GET_ACTIVE_RPVF(oct->conf); + for (vf = 0; vf < active_vfs; vf++) { + vf_mbox_queue = vf * active_rings_per_vf; + if (!(reg0 & (0x1UL << vf_mbox_queue))) + continue; + + if (!oct->mbox[vf_mbox_queue]) { + dev_err(&oct->pdev->dev, "bad mbox vf %d\n", vf); + continue; + } + schedule_work(&oct->mbox[vf_mbox_queue]->wk.work); + } + if (reg0) + octep_write_csr64(oct, CNXK_SDP_EPF_MBOX_RINT(0), reg0); + } +} + +static irqreturn_t octep_pfvf_mbox_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + + octep_poll_pfvf_mailbox_cnxk_pf(oct); + return IRQ_HANDLED; } /* Poll OEI events like heartbeat */ @@ -435,6 +463,7 @@ static irqreturn_t octep_oei_intr_handler_cnxk_pf(void *dev) */ static void octep_poll_non_ioq_interrupts_cnxk_pf(struct octep_device *oct) { + octep_poll_pfvf_mailbox_cnxk_pf(oct); octep_poll_oei_cnxk_pf(oct); } @@ -682,6 +711,7 @@ static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct) octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask); octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_MBOX_RINT_ENA_W1S(0), -1ULL); octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL); octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL); @@ -708,6 +738,7 @@ static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct) octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_MBOX_RINT_ENA_W1C(0), -1ULL); octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL); octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL); @@ -843,6 +874,7 @@ void octep_device_setup_cnxk_pf(struct octep_device *oct) oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cnxk_pf; oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cnxk_pf; + oct->hw_ops.mbox_intr_handler = octep_pfvf_mbox_intr_handler_cnxk_pf; oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cnxk_pf; oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cnxk_pf; oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cnxk_pf; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h index 7f8135788efc..6da32d40f926 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h @@ -16,10 +16,12 @@ * |reserved (4 bytes) | * |-------------------------------------------| * |host version (8 bytes) | + * | low 32 bits | * |host status (8 bytes) | * |host reserved (104 bytes) | * |-------------------------------------------| - * |fw version (8 bytes) | + * |fw version's (8 bytes) | + * | min=high 32 bits, max=low 32 bits | * |fw status (8 bytes) | * |fw reserved (104 bytes) | * |===========================================| diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c index 9dff2166dbb7..01b7be154c38 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c @@ -13,6 +13,7 @@ #include "octep_config.h" #include "octep_main.h" #include "octep_ctrl_net.h" +#include "octep_pfvf_mbox.h" /* Control plane version */ #define OCTEP_CP_VERSION_CURRENT OCTEP_CP_VERSION(1, 0, 0) @@ -329,6 +330,11 @@ static int process_mbox_notify(struct octep_device *oct, octep_ctrl_net_f2h_cmd_versions[cmd] < OCTEP_CP_VERSION_CURRENT) return -EOPNOTSUPP; + if (msg->hdr.s.is_vf) { + octep_pfvf_notify(oct, msg); + return 0; + } + switch (cmd) { case OCTEP_CTRL_NET_F2H_CMD_LINK_STATUS: if (netif_running(netdev)) { diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index e56188f1d09b..7c9faa714a10 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -16,6 +16,7 @@ #include "octep_config.h" #include "octep_main.h" #include "octep_ctrl_net.h" +#include "octep_pfvf_mbox.h" #define OCTEP_INTR_POLL_TIME_MSECS 100 struct workqueue_struct *octep_wq; @@ -160,6 +161,21 @@ static void octep_disable_msix(struct octep_device *oct) } /** + * octep_mbox_intr_handler() - common handler for pfvf mbox interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for pfvf mbox interrupts. + */ +static irqreturn_t octep_mbox_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.mbox_intr_handler(oct); +} + +/** * octep_oei_intr_handler() - common handler for output endpoint interrupts. * * @irq: Interrupt number. @@ -362,8 +378,12 @@ static int octep_request_irqs(struct octep_device *oct) snprintf(irq_name, OCTEP_MSIX_NAME_SIZE, "%s-%s", netdev->name, non_ioq_msix_names[i]); - if (!strncmp(non_ioq_msix_names[i], "epf_oei_rint", - strlen("epf_oei_rint"))) { + if (!strncmp(non_ioq_msix_names[i], "epf_mbox_rint", strlen("epf_mbox_rint"))) { + ret = request_irq(msix_entry->vector, + octep_mbox_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_oei_rint", + strlen("epf_oei_rint"))) { ret = request_irq(msix_entry->vector, octep_oei_intr_handler, 0, irq_name, oct); @@ -1322,6 +1342,7 @@ static void octep_device_cleanup(struct octep_device *oct) oct->mbox[i] = NULL; } + octep_delete_pfvf_mbox(oct); octep_ctrl_net_uninit(oct); cancel_delayed_work_sync(&oct->hb_task); @@ -1419,6 +1440,12 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_octep_config; } + err = octep_setup_pfvf_mbox(octep_dev); + if (err) { + dev_err(&pdev->dev, "PF-VF mailbox setup failed\n"); + goto register_dev_err; + } + err = octep_ctrl_net_get_info(octep_dev, OCTEP_CTRL_NET_INVALID_VFID, &octep_dev->conf->fw_info); if (err) { @@ -1487,6 +1514,21 @@ err_dma_mask: return err; } +static int octep_sriov_disable(struct octep_device *oct) +{ + struct pci_dev *pdev = oct->pdev; + + if (pci_vfs_assigned(oct->pdev)) { + dev_warn(&pdev->dev, "Can't disable SRIOV while VFs are assigned\n"); + return -EPERM; + } + + pci_disable_sriov(pdev); + CFG_GET_ACTIVE_VFS(oct->conf) = 0; + + return 0; +} + /** * octep_remove() - Remove Octeon PCI device from driver control. * @@ -1504,6 +1546,7 @@ static void octep_remove(struct pci_dev *pdev) return; netdev = oct->netdev; + octep_sriov_disable(oct); if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); @@ -1514,11 +1557,47 @@ static void octep_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static int octep_sriov_enable(struct octep_device *oct, int num_vfs) +{ + struct pci_dev *pdev = oct->pdev; + int err; + + CFG_GET_ACTIVE_VFS(oct->conf) = num_vfs; + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "Failed to enable SRIOV err=%d\n", err); + CFG_GET_ACTIVE_VFS(oct->conf) = 0; + return err; + } + + return num_vfs; +} + +static int octep_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct octep_device *oct = pci_get_drvdata(pdev); + int max_nvfs; + + if (num_vfs == 0) + return octep_sriov_disable(oct); + + max_nvfs = CFG_GET_MAX_VFS(oct->conf); + + if (num_vfs > max_nvfs) { + dev_err(&pdev->dev, "Invalid VF count Max supported VFs = %d\n", + max_nvfs); + return -EINVAL; + } + + return octep_sriov_enable(oct, num_vfs); +} + static struct pci_driver octep_driver = { .name = OCTEP_DRV_NAME, .id_table = octep_pci_id_tbl, .probe = octep_probe, .remove = octep_remove, + .sriov_configure = octep_sriov_configure, }; /** diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index 2cb93d425744..fee59e0e0138 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -80,6 +80,7 @@ struct octep_hw_ops { void (*setup_oq_regs)(struct octep_device *oct, int q); void (*setup_mbox_regs)(struct octep_device *oct, int mbox); + irqreturn_t (*mbox_intr_handler)(void *ioq_vector); irqreturn_t (*oei_intr_handler)(void *ioq_vector); irqreturn_t (*ire_intr_handler)(void *ioq_vector); irqreturn_t (*ore_intr_handler)(void *ioq_vector); @@ -118,28 +119,27 @@ struct octep_mbox_data { u64 *data; }; +#define MAX_VF_PF_MBOX_DATA_SIZE 384 +/* wrappers around work structs */ +struct octep_pfvf_mbox_wk { + struct work_struct work; + void *ctxptr; + u64 ctxul; +}; + /* Octeon device mailbox */ struct octep_mbox { - /* A spinlock to protect access to this q_mbox. */ - spinlock_t lock; - - u32 q_no; - u32 state; - - /* SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */ - u8 __iomem *mbox_int_reg; - - /* SLI_PKT_PF_VF_MBOX_SIG(0) for PF, - * SLI_PKT_PF_VF_MBOX_SIG(1) for VF. - */ - u8 __iomem *mbox_write_reg; - - /* SLI_PKT_PF_VF_MBOX_SIG(1) for PF, - * SLI_PKT_PF_VF_MBOX_SIG(0) for VF. - */ - u8 __iomem *mbox_read_reg; - + /* A mutex to protect access to this q_mbox. */ + struct mutex lock; + u32 vf_id; + u32 config_data_index; + u32 message_len; + u8 __iomem *pf_vf_data_reg; + u8 __iomem *vf_pf_data_reg; + struct octep_pfvf_mbox_wk wk; + struct octep_device *oct; struct octep_mbox_data mbox_data; + u8 config_data[MAX_VF_PF_MBOX_DATA_SIZE]; }; /* Tx/Rx queue vector per interrupt. */ @@ -217,6 +217,12 @@ struct octep_iface_link_info { u8 oper_up; }; +/* The Octeon VF device specific info data structure.*/ +struct octep_pfvf_info { + u8 mac_addr[ETH_ALEN]; + u32 mbox_version; +}; + /* The Octeon device specific private data structure. * Each Octeon device has this structure to represent all its components. */ @@ -282,6 +288,8 @@ struct octep_device { /* Mailbox to talk to VFs */ struct octep_mbox *mbox[OCTEP_MAX_VF]; + /* VFs info */ + struct octep_pfvf_info vf_info[OCTEP_MAX_VF]; /* Work entry to handle Tx timeout */ struct work_struct tx_timeout_task; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c new file mode 100644 index 000000000000..2e2c3be8a0b4 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/mutex.h> +#include <linux/jiffies.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/io.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_pfvf_mbox.h" +#include "octep_ctrl_net.h" + +/* When a new command is implemented, the below table should be updated + * with new command and it's version info. + */ +static u32 pfvf_cmd_versions[OCTEP_PFVF_MBOX_CMD_MAX] = { + [0 ... OCTEP_PFVF_MBOX_CMD_DEV_REMOVE] = OCTEP_PFVF_MBOX_VERSION_V1, + [OCTEP_PFVF_MBOX_CMD_GET_FW_INFO ... OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS] = + OCTEP_PFVF_MBOX_VERSION_V2 +}; + +static void octep_pfvf_validate_version(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + u32 vf_version = (u32)cmd.s_version.version; + + dev_dbg(&oct->pdev->dev, "VF id:%d VF version:%d PF version:%d\n", + vf_id, vf_version, OCTEP_PFVF_MBOX_VERSION_CURRENT); + if (vf_version < OCTEP_PFVF_MBOX_VERSION_CURRENT) + rsp->s_version.version = vf_version; + else + rsp->s_version.version = OCTEP_PFVF_MBOX_VERSION_CURRENT; + + oct->vf_info[vf_id].mbox_version = rsp->s_version.version; + dev_dbg(&oct->pdev->dev, "VF id:%d negotiated VF version:%d\n", + vf_id, oct->vf_info[vf_id].mbox_version); + + rsp->s_version.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_get_link_status(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int status; + + status = octep_ctrl_net_get_link_status(oct, vf_id); + if (status < 0) { + rsp->s_link_status.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Get VF link status failed via host control Mbox\n"); + return; + } + rsp->s_link_status.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; + rsp->s_link_status.status = status; +} + +static void octep_pfvf_set_link_status(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int err; + + err = octep_ctrl_net_set_link_status(oct, vf_id, cmd.s_link_status.status, true); + if (err) { + rsp->s_link_status.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Set VF link status failed via host control Mbox\n"); + return; + } + rsp->s_link_status.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_set_rx_state(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int err; + + err = octep_ctrl_net_set_rx_state(oct, vf_id, cmd.s_link_state.state, true); + if (err) { + rsp->s_link_state.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Set VF Rx link state failed via host control Mbox\n"); + return; + } + rsp->s_link_state.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static int octep_send_notification(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd) +{ + u32 max_rings_per_vf, vf_mbox_queue; + struct octep_mbox *mbox; + + /* check if VF PF Mailbox is compatible for this notification */ + if (pfvf_cmd_versions[cmd.s.opcode] > oct->vf_info[vf_id].mbox_version) { + dev_dbg(&oct->pdev->dev, "VF Mbox doesn't support Notification:%d on VF ver:%d\n", + cmd.s.opcode, oct->vf_info[vf_id].mbox_version); + return -EOPNOTSUPP; + } + + max_rings_per_vf = CFG_GET_MAX_RPVF(oct->conf); + vf_mbox_queue = vf_id * max_rings_per_vf; + if (!oct->mbox[vf_mbox_queue]) { + dev_err(&oct->pdev->dev, "Notif obtained for bad mbox vf %d\n", vf_id); + return -EINVAL; + } + mbox = oct->mbox[vf_mbox_queue]; + + mutex_lock(&mbox->lock); + writeq(cmd.u64, mbox->pf_vf_data_reg); + mutex_unlock(&mbox->lock); + + return 0; +} + +static void octep_pfvf_set_mtu(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int err; + + err = octep_ctrl_net_set_mtu(oct, vf_id, cmd.s_set_mtu.mtu, true); + if (err) { + rsp->s_set_mtu.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Set VF MTU failed via host control Mbox\n"); + return; + } + rsp->s_set_mtu.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_get_mtu(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int max_rx_pktlen = oct->netdev->max_mtu + (ETH_HLEN + ETH_FCS_LEN); + + rsp->s_set_mtu.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; + rsp->s_get_mtu.mtu = max_rx_pktlen; +} + +static void octep_pfvf_set_mac_addr(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int err; + + err = octep_ctrl_net_set_mac_addr(oct, vf_id, cmd.s_set_mac.mac_addr, true); + if (err) { + rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Set VF MAC address failed via host control Mbox\n"); + return; + } + rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_get_mac_addr(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int err; + + err = octep_ctrl_net_get_mac_addr(oct, vf_id, rsp->s_set_mac.mac_addr); + if (err) { + rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Get VF MAC address failed via host control Mbox\n"); + return; + } + rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_dev_remove(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int err; + + err = octep_ctrl_net_dev_remove(oct, vf_id); + if (err) { + rsp->s.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Failed to acknowledge fw of vf %d removal\n", + vf_id); + return; + } + rsp->s.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_get_fw_info(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + struct octep_fw_info fw_info; + int err; + + err = octep_ctrl_net_get_info(oct, vf_id, &fw_info); + if (err) { + rsp->s_fw_info.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Get VF info failed via host control Mbox\n"); + return; + } + + rsp->s_fw_info.pkind = fw_info.pkind; + rsp->s_fw_info.fsz = fw_info.fsz; + rsp->s_fw_info.rx_ol_flags = fw_info.rx_ol_flags; + rsp->s_fw_info.tx_ol_flags = fw_info.tx_ol_flags; + + rsp->s_fw_info.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +static void octep_pfvf_set_offloads(struct octep_device *oct, u32 vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + struct octep_ctrl_net_offloads offloads = { + .rx_offloads = cmd.s_offloads.rx_ol_flags, + .tx_offloads = cmd.s_offloads.tx_ol_flags + }; + int err; + + err = octep_ctrl_net_set_offloads(oct, vf_id, &offloads, true); + if (err) { + rsp->s_offloads.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + dev_err(&oct->pdev->dev, "Set VF offloads failed via host control Mbox\n"); + return; + } + rsp->s_offloads.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; +} + +int octep_setup_pfvf_mbox(struct octep_device *oct) +{ + int i = 0, num_vfs = 0, rings_per_vf = 0; + int ring = 0; + + num_vfs = oct->conf->sriov_cfg.active_vfs; + rings_per_vf = oct->conf->sriov_cfg.max_rings_per_vf; + + for (i = 0; i < num_vfs; i++) { + ring = rings_per_vf * i; + oct->mbox[ring] = vzalloc(sizeof(*oct->mbox[ring])); + + if (!oct->mbox[ring]) + goto free_mbox; + + memset(oct->mbox[ring], 0, sizeof(struct octep_mbox)); + memset(&oct->vf_info[i], 0, sizeof(struct octep_pfvf_info)); + mutex_init(&oct->mbox[ring]->lock); + INIT_WORK(&oct->mbox[ring]->wk.work, octep_pfvf_mbox_work); + oct->mbox[ring]->wk.ctxptr = oct->mbox[ring]; + oct->mbox[ring]->oct = oct; + oct->mbox[ring]->vf_id = i; + oct->hw_ops.setup_mbox_regs(oct, ring); + } + return 0; + +free_mbox: + while (i) { + i--; + ring = rings_per_vf * i; + cancel_work_sync(&oct->mbox[ring]->wk.work); + mutex_destroy(&oct->mbox[ring]->lock); + vfree(oct->mbox[ring]); + oct->mbox[ring] = NULL; + } + return -ENOMEM; +} + +void octep_delete_pfvf_mbox(struct octep_device *oct) +{ + int rings_per_vf = oct->conf->sriov_cfg.max_rings_per_vf; + int num_vfs = oct->conf->sriov_cfg.active_vfs; + int i = 0, ring = 0, vf_srn = 0; + + for (i = 0; i < num_vfs; i++) { + ring = vf_srn + rings_per_vf * i; + if (!oct->mbox[ring]) + continue; + + if (work_pending(&oct->mbox[ring]->wk.work)) + cancel_work_sync(&oct->mbox[ring]->wk.work); + + mutex_destroy(&oct->mbox[ring]->lock); + vfree(oct->mbox[ring]); + oct->mbox[ring] = NULL; + } +} + +static void octep_pfvf_pf_get_data(struct octep_device *oct, + struct octep_mbox *mbox, int vf_id, + union octep_pfvf_mbox_word cmd, + union octep_pfvf_mbox_word *rsp) +{ + int length = 0; + int i = 0; + int err; + struct octep_iface_link_info link_info; + struct octep_iface_rx_stats rx_stats; + struct octep_iface_tx_stats tx_stats; + + rsp->s_data.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; + + if (cmd.s_data.frag != OCTEP_PFVF_MBOX_MORE_FRAG_FLAG) { + mbox->config_data_index = 0; + memset(mbox->config_data, 0, MAX_VF_PF_MBOX_DATA_SIZE); + /* Based on the OPCODE CMD the PF driver + * specific API should be called to fetch + * the requested data + */ + switch (cmd.s.opcode) { + case OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO: + memset(&link_info, 0, sizeof(link_info)); + err = octep_ctrl_net_get_link_info(oct, vf_id, &link_info); + if (!err) { + mbox->message_len = sizeof(link_info); + *((int32_t *)rsp->s_data.data) = mbox->message_len; + memcpy(mbox->config_data, (u8 *)&link_info, sizeof(link_info)); + } else { + rsp->s_data.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + return; + } + break; + case OCTEP_PFVF_MBOX_CMD_GET_STATS: + memset(&rx_stats, 0, sizeof(rx_stats)); + memset(&tx_stats, 0, sizeof(tx_stats)); + err = octep_ctrl_net_get_if_stats(oct, vf_id, &rx_stats, &tx_stats); + if (!err) { + mbox->message_len = sizeof(rx_stats) + sizeof(tx_stats); + *((int32_t *)rsp->s_data.data) = mbox->message_len; + memcpy(mbox->config_data, (u8 *)&rx_stats, sizeof(rx_stats)); + memcpy(mbox->config_data + sizeof(rx_stats), (u8 *)&tx_stats, + sizeof(tx_stats)); + + } else { + rsp->s_data.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + return; + } + break; + } + *((int32_t *)rsp->s_data.data) = mbox->message_len; + return; + } + + if (mbox->message_len > OCTEP_PFVF_MBOX_MAX_DATA_SIZE) + length = OCTEP_PFVF_MBOX_MAX_DATA_SIZE; + else + length = mbox->message_len; + + mbox->message_len -= length; + + for (i = 0; i < length; i++) { + rsp->s_data.data[i] = + mbox->config_data[mbox->config_data_index]; + mbox->config_data_index++; + } +} + +void octep_pfvf_notify(struct octep_device *oct, struct octep_ctrl_mbox_msg *msg) +{ + union octep_pfvf_mbox_word notif = { 0 }; + struct octep_ctrl_net_f2h_req *req; + + req = (struct octep_ctrl_net_f2h_req *)msg->sg_list[0].msg; + switch (req->hdr.s.cmd) { + case OCTEP_CTRL_NET_F2H_CMD_LINK_STATUS: + notif.s_link_status.opcode = OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS; + notif.s_link_status.status = req->link.state; + break; + default: + pr_info("Unknown mbox notif for vf: %u\n", + req->hdr.s.cmd); + return; + } + + notif.s.type = OCTEP_PFVF_MBOX_TYPE_CMD; + octep_send_notification(oct, msg->hdr.s.vf_idx, notif); +} + +void octep_pfvf_mbox_work(struct work_struct *work) +{ + struct octep_pfvf_mbox_wk *wk = container_of(work, struct octep_pfvf_mbox_wk, work); + union octep_pfvf_mbox_word cmd = { 0 }; + union octep_pfvf_mbox_word rsp = { 0 }; + struct octep_mbox *mbox = NULL; + struct octep_device *oct = NULL; + int vf_id; + + mbox = (struct octep_mbox *)wk->ctxptr; + oct = (struct octep_device *)mbox->oct; + vf_id = mbox->vf_id; + + mutex_lock(&mbox->lock); + cmd.u64 = readq(mbox->vf_pf_data_reg); + rsp.u64 = 0; + + switch (cmd.s.opcode) { + case OCTEP_PFVF_MBOX_CMD_VERSION: + octep_pfvf_validate_version(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_GET_LINK_STATUS: + octep_pfvf_get_link_status(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_SET_LINK_STATUS: + octep_pfvf_set_link_status(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_SET_RX_STATE: + octep_pfvf_set_rx_state(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_SET_MTU: + octep_pfvf_set_mtu(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_SET_MAC_ADDR: + octep_pfvf_set_mac_addr(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_GET_MAC_ADDR: + octep_pfvf_get_mac_addr(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO: + case OCTEP_PFVF_MBOX_CMD_GET_STATS: + octep_pfvf_pf_get_data(oct, mbox, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_GET_MTU: + octep_pfvf_get_mtu(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_DEV_REMOVE: + octep_pfvf_dev_remove(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_GET_FW_INFO: + octep_pfvf_get_fw_info(oct, vf_id, cmd, &rsp); + break; + case OCTEP_PFVF_MBOX_CMD_SET_OFFLOADS: + octep_pfvf_set_offloads(oct, vf_id, cmd, &rsp); + break; + default: + dev_err(&oct->pdev->dev, "PF-VF mailbox: invalid opcode %d\n", cmd.s.opcode); + rsp.s.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + break; + } + writeq(rsp.u64, mbox->vf_pf_data_reg); + mutex_unlock(&mbox->lock); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h new file mode 100644 index 000000000000..0dc6eead292a --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_PFVF_MBOX_H_ +#define _OCTEP_PFVF_MBOX_H_ + +/* VF flags */ +#define OCTEON_PFVF_FLAG_MAC_SET_BY_PF BIT_ULL(0) /* PF has set VF MAC address */ +#define OCTEON_SDP_16K_HW_FRS 16380UL +#define OCTEON_SDP_64K_HW_FRS 65531UL + +/* When a new command is implemented,PF Mbox version should be bumped. + */ +enum octep_pfvf_mbox_version { + OCTEP_PFVF_MBOX_VERSION_V0, + OCTEP_PFVF_MBOX_VERSION_V1, + OCTEP_PFVF_MBOX_VERSION_V2, +}; + +#define OCTEP_PFVF_MBOX_VERSION_CURRENT OCTEP_PFVF_MBOX_VERSION_V2 + +enum octep_pfvf_mbox_opcode { + OCTEP_PFVF_MBOX_CMD_VERSION, + OCTEP_PFVF_MBOX_CMD_SET_MTU, + OCTEP_PFVF_MBOX_CMD_SET_MAC_ADDR, + OCTEP_PFVF_MBOX_CMD_GET_MAC_ADDR, + OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO, + OCTEP_PFVF_MBOX_CMD_GET_STATS, + OCTEP_PFVF_MBOX_CMD_SET_RX_STATE, + OCTEP_PFVF_MBOX_CMD_SET_LINK_STATUS, + OCTEP_PFVF_MBOX_CMD_GET_LINK_STATUS, + OCTEP_PFVF_MBOX_CMD_GET_MTU, + OCTEP_PFVF_MBOX_CMD_DEV_REMOVE, + OCTEP_PFVF_MBOX_CMD_GET_FW_INFO, + OCTEP_PFVF_MBOX_CMD_SET_OFFLOADS, + OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS, + OCTEP_PFVF_MBOX_CMD_MAX, +}; + +enum octep_pfvf_mbox_word_type { + OCTEP_PFVF_MBOX_TYPE_CMD, + OCTEP_PFVF_MBOX_TYPE_RSP_ACK, + OCTEP_PFVF_MBOX_TYPE_RSP_NACK, +}; + +enum octep_pfvf_mbox_cmd_status { + OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP = 1, + OCTEP_PFVF_MBOX_CMD_STATUS_TIMEDOUT = 2, + OCTEP_PFVF_MBOX_CMD_STATUS_NACK = 3, + OCTEP_PFVF_MBOX_CMD_STATUS_BUSY = 4 +}; + +enum octep_pfvf_mbox_state { + OCTEP_PFVF_MBOX_STATE_IDLE = 0, + OCTEP_PFVF_MBOX_STATE_BUSY = 1, +}; + +enum octep_pfvf_link_status { + OCTEP_PFVF_LINK_STATUS_DOWN, + OCTEP_PFVF_LINK_STATUS_UP, +}; + +enum octep_pfvf_link_speed { + OCTEP_PFVF_LINK_SPEED_NONE, + OCTEP_PFVF_LINK_SPEED_1000, + OCTEP_PFVF_LINK_SPEED_10000, + OCTEP_PFVF_LINK_SPEED_25000, + OCTEP_PFVF_LINK_SPEED_40000, + OCTEP_PFVF_LINK_SPEED_50000, + OCTEP_PFVF_LINK_SPEED_100000, + OCTEP_PFVF_LINK_SPEED_LAST, +}; + +enum octep_pfvf_link_duplex { + OCTEP_PFVF_LINK_HALF_DUPLEX, + OCTEP_PFVF_LINK_FULL_DUPLEX, +}; + +enum octep_pfvf_link_autoneg { + OCTEP_PFVF_LINK_AUTONEG, + OCTEP_PFVF_LINK_FIXED, +}; + +#define OCTEP_PFVF_MBOX_TIMEOUT_MS 500 +#define OCTEP_PFVF_MBOX_MAX_RETRIES 2 +#define OCTEP_PFVF_MBOX_MAX_DATA_SIZE 6 +#define OCTEP_PFVF_MBOX_MORE_FRAG_FLAG 1 +#define OCTEP_PFVF_MBOX_WRITE_WAIT_TIME msecs_to_jiffies(1) + +union octep_pfvf_mbox_word { + u64 u64; + struct { + u64 opcode:8; + u64 type:2; + u64 rsvd:6; + u64 data:48; + } s; + struct { + u64 opcode:8; + u64 type:2; + u64 frag:1; + u64 rsvd:5; + u8 data[6]; + } s_data; + struct { + u64 opcode:8; + u64 type:2; + u64 rsvd:6; + u64 version:48; + } s_version; + struct { + u64 opcode:8; + u64 type:2; + u64 rsvd:6; + u8 mac_addr[6]; + } s_set_mac; + struct { + u64 opcode:8; + u64 type:2; + u64 rsvd:6; + u64 mtu:48; + } s_set_mtu; + struct { + u64 opcode:8; + u64 type:2; + u64 rsvd:6; + u64 mtu:48; + } s_get_mtu; + struct { + u64 opcode:8; + u64 type:2; + u64 state:1; + u64 rsvd:53; + } s_link_state; + struct { + u64 opcode:8; + u64 type:2; + u64 status:1; + u64 rsvd:53; + } s_link_status; + struct { + u64 opcode:8; + u64 type:2; + u64 pkind:8; + u64 fsz:8; + u64 rx_ol_flags:16; + u64 tx_ol_flags:16; + u64 rsvd:6; + } s_fw_info; + struct { + u64 opcode:8; + u64 type:2; + u64 rsvd:22; + u64 rx_ol_flags:16; + u64 tx_ol_flags:16; + } s_offloads; +} __packed; + +void octep_pfvf_mbox_work(struct work_struct *work); +int octep_setup_pfvf_mbox(struct octep_device *oct); +void octep_delete_pfvf_mbox(struct octep_device *oct); +void octep_pfvf_notify(struct octep_device *oct, struct octep_ctrl_mbox_msg *msg); +#endif diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index 2e20a39d89af..ca473502d7a0 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -208,6 +208,9 @@ #define CN93_SDP_R_MBOX_PF_VF_INT_START 0x10220 #define CN93_SDP_R_MBOX_VF_PF_DATA_START 0x10230 +#define CN93_SDP_MBOX_VF_PF_DATA_START 0x24000 +#define CN93_SDP_MBOX_PF_VF_DATA_START 0x22000 + #define CN93_SDP_R_MBOX_PF_VF_DATA(ring) \ (CN93_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CN93_RING_OFFSET)) @@ -217,6 +220,12 @@ #define CN93_SDP_R_MBOX_VF_PF_DATA(ring) \ (CN93_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CN93_RING_OFFSET)) +#define CN93_SDP_MBOX_VF_PF_DATA(ring) \ + (CN93_SDP_MBOX_VF_PF_DATA_START + ((ring) * CN93_EPVF_RING_OFFSET)) + +#define CN93_SDP_MBOX_PF_VF_DATA(ring) \ + (CN93_SDP_MBOX_PF_VF_DATA_START + ((ring) * CN93_EPVF_RING_OFFSET)) + /* ##################### Interrupt Registers ########################## */ #define CN93_SDP_R_ERR_TYPE_START 0x10400 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h index ea677f760ef0..e637d7c8224d 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h @@ -183,6 +183,7 @@ #define CNXK_SDP_R_OUT_INT_MDRT_CTL1_START 0x103A0 #define CNXK_SDP_R_OUT_INT_MDRT_DBG_START 0x103C0 +#define CNXK_SDP_R_MBOX_ISM_START 0x10500 #define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510 #define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520 @@ -204,6 +205,9 @@ #define CNXK_SDP_R_OUT_INT_MDRT_DBG(ring) \ (CNXK_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET)) +#define CNXK_SDP_R_MBOX_ISM(ring) \ + (CNXK_SDP_R_MBOX_ISM_START + ((ring) * CNXK_RING_OFFSET)) + #define CNXK_SDP_R_OUT_CNTS_ISM(ring) \ (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) @@ -222,6 +226,9 @@ #define CNXK_SDP_R_MBOX_PF_VF_INT_START 0x10220 #define CNXK_SDP_R_MBOX_VF_PF_DATA_START 0x10230 +#define CNXK_SDP_MBOX_VF_PF_DATA_START 0x24000 +#define CNXK_SDP_MBOX_PF_VF_DATA_START 0x22000 + #define CNXK_SDP_R_MBOX_PF_VF_DATA(ring) \ (CNXK_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_RING_OFFSET)) @@ -231,6 +238,12 @@ #define CNXK_SDP_R_MBOX_VF_PF_DATA(ring) \ (CNXK_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_RING_OFFSET)) +#define CNXK_SDP_MBOX_VF_PF_DATA(ring) \ + (CNXK_SDP_MBOX_VF_PF_DATA_START + ((ring) * CNXK_EPVF_RING_OFFSET)) + +#define CNXK_SDP_MBOX_PF_VF_DATA(ring) \ + (CNXK_SDP_MBOX_PF_VF_DATA_START + ((ring) * CNXK_EPVF_RING_OFFSET)) + /* ##################### Interrupt Registers ########################## */ #define CNXK_SDP_R_ERR_TYPE_START 0x10400 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h index 059fa921069f..875a2c34091f 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h @@ -61,6 +61,18 @@ struct octep_tx_buffer { /* Hardware interface Tx statistics */ struct octep_iface_tx_stats { + /* Total frames sent on the interface */ + u64 pkts; + + /* Total octets sent on the interface */ + u64 octs; + + /* Packets sent to a broadcast DMAC */ + u64 bcst; + + /* Packets sent to the multicast DMAC */ + u64 mcst; + /* Packets dropped due to excessive collisions */ u64 xscol; @@ -77,12 +89,6 @@ struct octep_iface_tx_stats { */ u64 scol; - /* Total octets sent on the interface */ - u64 octs; - - /* Total frames sent on the interface */ - u64 pkts; - /* Packets sent with an octet count < 64 */ u64 hist_lt64; @@ -107,12 +113,6 @@ struct octep_iface_tx_stats { /* Packets sent with an octet count of > 1518 */ u64 hist_gt1518; - /* Packets sent to a broadcast DMAC */ - u64 bcst; - - /* Packets sent to the multicast DMAC */ - u64 mcst; - /* Packets sent that experienced a transmit underflow and were * truncated */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index bb5fdb225dab..1e6fbd98423d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -5,7 +5,7 @@ * */ -#include<linux/bitfield.h> +#include <linux/bitfield.h> #include "rvu.h" #include "rvu_reg.h" diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 513c4fe86967..167145bdcb75 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1990,7 +1990,7 @@ free_entry_map: free_bmap_reverse: kfree(mcam->bmap_reverse); free_bmap: - kfree(mcam->counters.bmap); + kfree(mcam->bmap); return -ENOMEM; } @@ -2715,18 +2715,17 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu, rsp->entry = NPC_MCAM_ENTRY_INVALID; rsp->free_count = 0; - /* Check if ref_entry is within range */ - if (req->priority && req->ref_entry >= mcam->bmap_entries) { - dev_err(rvu->dev, "%s: reference entry %d is out of range\n", - __func__, req->ref_entry); - return NPC_MCAM_INVALID_REQ; - } + /* Check if ref_entry is greater that the range + * then set it to max value. + */ + if (req->ref_entry > mcam->bmap_entries) + req->ref_entry = mcam->bmap_entries; /* ref_entry can't be '0' if requested priority is high. * Can't be last entry if requested priority is low. */ if ((!req->ref_entry && req->priority == NPC_MCAM_HIGHER_PRIO) || - ((req->ref_entry == (mcam->bmap_entries - 1)) && + ((req->ref_entry == mcam->bmap_entries) && req->priority == NPC_MCAM_LOWER_PRIO)) return NPC_MCAM_INVALID_REQ; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index bfddbff7bcdf..28fb643d2917 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -399,9 +399,10 @@ static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { struct otx2_nic *pfvf = netdev_priv(dev); + u8 old_pfc_en; int err; - /* Save PFC configuration to interface */ + old_pfc_en = pfvf->pfc_en; pfvf->pfc_en = pfc->pfc_en; if (pfvf->hw.tx_queues >= NIX_PF_PFC_PRIO_MAX) @@ -411,13 +412,17 @@ static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) * supported by the tx queue configuration */ err = otx2_check_pfc_config(pfvf); - if (err) + if (err) { + pfvf->pfc_en = old_pfc_en; return err; + } process_pfc: err = otx2_config_priority_flow_ctrl(pfvf); - if (err) + if (err) { + pfvf->pfc_en = old_pfc_en; return err; + } /* Request Per channel Bpids */ if (pfc->pfc_en) @@ -425,6 +430,12 @@ process_pfc: err = otx2_pfc_txschq_update(pfvf); if (err) { + if (pfc->pfc_en) + otx2_nix_config_bp(pfvf, false); + + otx2_pfc_txschq_stop(pfvf); + pfvf->pfc_en = old_pfc_en; + otx2_config_priority_flow_ctrl(pfvf); dev_err(pfvf->dev, "%s failed to update TX schedulers\n", __func__); return err; } diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index 7ffbd4fca881..d58b07e7e123 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -292,6 +292,9 @@ mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) for (i = 0; i < q->n_desc; i++) { struct mtk_wed_wo_queue_entry *entry = &q->entry[i]; + if (!entry->buf) + continue; + dma_unmap_single(wo->hw->dev, entry->addr, entry->len, DMA_TO_DEVICE); skb_free_frag(entry->buf); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c44870b175f9..f36232dead1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -29,7 +29,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \ - lib/crypto.o + en/mgmt_pf.o lib/crypto.o lib/sd.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f8f0a712c943..a7b1f9686c09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -156,15 +156,18 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int cmd_alloc_index(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent) { unsigned long flags; int ret; spin_lock_irqsave(&cmd->alloc_lock, flags); ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); - if (ret < cmd->vars.max_reg_cmds) + if (ret < cmd->vars.max_reg_cmds) { clear_bit(ret, &cmd->vars.bitmask); + ent->idx = ret; + cmd->ent_arr[ent->idx] = ent; + } spin_unlock_irqrestore(&cmd->alloc_lock, flags); return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM; @@ -979,7 +982,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index(cmd, ent); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -994,15 +997,14 @@ static void cmd_work_handler(struct work_struct *work) up(sem); return; } - ent->idx = alloc_ret; } else { ent->idx = cmd->vars.max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); clear_bit(ent->idx, &cmd->vars.bitmask); + cmd->ent_arr[ent->idx] = ent; spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index cf0477f53dc4..aa1b471e13fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -190,6 +190,9 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev) if (is_mp_supported(dev)) return false; + if (mlx5_core_is_mgmt_pf(dev)) + return false; + return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 76d27d2ee40c..080e7eab52c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -718,7 +718,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) while (block_timestamp > tracer->last_timestamp) { /* Check block override if it's not the first block */ - if (!tracer->last_timestamp) { + if (tracer->last_timestamp) { u64 *ts_event; /* To avoid block override be the HW in case of buffer * wraparound, the time stamp of the previous block diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index d000236ddbac..aa397e3ebe6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -75,6 +75,9 @@ int mlx5_ec_init(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return 0; + if (mlx5_core_is_mgmt_pf(dev)) + return 0; + return mlx5_host_pf_init(dev); } @@ -85,6 +88,9 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return; + if (mlx5_core_is_mgmt_pf(dev)) + return; + mlx5_host_pf_cleanup(dev); err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0bfe1ca8a364..922b63c25154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -60,8 +60,10 @@ #include "lib/clock.h" #include "en/rx_res.h" #include "en/selq.h" +#include "lib/sd.h" extern const struct net_device_ops mlx5e_netdev_ops; +extern const struct net_device_ops mlx5e_mgmt_netdev_ops; struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) @@ -791,6 +793,8 @@ struct mlx5e_channel { struct hwtstamp_config *tstamp; DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; + int vec_ix; + int sd_ix; int cpu; /* Sync between icosq recovery and XSK enable/disable. */ struct mutex icosq_recovery_lock; @@ -914,7 +918,7 @@ struct mlx5e_priv { bool tx_ptp_opened; bool rx_ptp_opened; struct hwtstamp_config tstamp; - u16 q_counter; + u16 q_counter[MLX5_SD_MAX_GROUP_SZ]; u16 drop_rq_q_counter; struct notifier_block events_nb; struct notifier_block blocking_events_nb; @@ -1029,12 +1033,12 @@ struct mlx5e_xsk_param; struct mlx5e_rq_param; int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, - struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_xsk_param *xsk, int node, u16 q_counter, struct mlx5e_rq *rq); #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_close_rq(struct mlx5e_rq *rq); -int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; @@ -1122,9 +1126,10 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) } extern const struct ethtool_ops mlx5e_ethtool_ops; +extern const struct mlx5e_profile mlx5e_mgmt_pf_nic_profile; int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); -int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, bool enable_mc_lb); @@ -1227,6 +1232,8 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features); int mlx5e_set_features(struct net_device *netdev, netdev_features_t features); +void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv); + #ifdef CONFIG_MLX5_ESWITCH int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c index 48581ea3adcb..874a1016623c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -23,20 +23,26 @@ bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix) return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); } -void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn, + u32 *vhca_id) { struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); *rqn = c->rq.rqn; + if (vhca_id) + *vhca_id = MLX5_CAP_GEN(c->mdev, vhca_id); } -void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn, + u32 *vhca_id) { struct mlx5e_channel *c = mlx5e_channels_get(chs, ix); WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)); *rqn = c->xskrq.rqn; + if (vhca_id) + *vhca_id = MLX5_CAP_GEN(c->mdev, vhca_id); } bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h index 637ca90daaa8..6715aa9383b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -10,8 +10,10 @@ struct mlx5e_channels; unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix); -void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); -void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn, + u32 *vhca_id); +void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn, + u32 *vhca_id); bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); #endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index be83ad9db82a..e1283531e0b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -154,6 +154,7 @@ static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type ty in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mgmt_pf.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mgmt_pf.c new file mode 100644 index 000000000000..77b5805895b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mgmt_pf.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/kernel.h> +#include "en/params.h" +#include "en/health.h" +#include "lib/eq.h" +#include "en/dcbnl.h" +#include "en_accel/ipsec.h" +#include "en_accel/en_accel.h" +#include "en/trap.h" +#include "en/monitor_stats.h" +#include "en/hv_vhca_stats.h" +#include "en_rep.h" +#include "en.h" + +static int mgmt_pf_async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; + + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static void mlx5e_mgmt_pf_enable_async_events(struct mlx5e_priv *priv) +{ + priv->events_nb.notifier_call = mgmt_pf_async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); +} + +static void mlx5e_disable_mgmt_pf_async_events(struct mlx5e_priv *priv) +{ + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); +} + +static void mlx5e_modify_mgmt_pf_admin_state(struct mlx5_core_dev *mdev, + enum mlx5_port_status state) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int vport_admin_state; + + mlx5_set_port_admin_status(mdev, state); + + if (state == MLX5_PORT_UP) + vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO; + else + vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN; + + mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state); +} + +static void mlx5e_build_mgmt_pf_nic_params(struct mlx5e_priv *priv, u16 mtu) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5_core_dev *mdev = priv->mdev; + u8 rx_cq_period_mode; + + params->sw_mtu = mtu; + params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->num_channels = 1; + + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev)); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); + + /* RQ */ + mlx5e_build_rq_params(mdev, params); + + /* CQ moderation params */ + rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); + mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + + /* TX inline */ + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); +} + +static int mlx5e_mgmt_pf_init(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_flow_steering *fs; + int err; + + mlx5e_build_mgmt_pf_nic_params(priv, netdev->mtu); + + mlx5e_timestamp_init(priv); + + fs = mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); + if (!fs) { + err = -ENOMEM; + mlx5_core_err(mdev, "FS initialization failed, %d\n", err); + return err; + } + priv->fs = fs; + + mlx5e_health_create_reporters(priv); + + return 0; +} + +static void mlx5e_mgmt_pf_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_health_destroy_reporters(priv); + mlx5e_fs_cleanup(priv->fs); + priv->fs = NULL; +} + +static int mlx5e_mgmt_pf_init_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + priv->rx_res = mlx5e_rx_res_create(mdev, 0, priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, + priv->channels.params.num_channels); + if (!priv->rx_res) + return -ENOMEM; + + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + + err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile, + priv->netdev); + if (err) { + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_rx_res; + } + + return 0; + +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); + priv->rx_res = NULL; + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); + return err; +} + +static void mlx5e_mgmt_pf_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE), + priv->profile); + mlx5e_rx_res_destroy(priv->rx_res); + priv->rx_res = NULL; + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); +} + +static int mlx5e_mgmt_pf_init_tx(struct mlx5e_priv *priv) +{ + return 0; +} + +static void mlx5e_mgmt_pf_cleanup_tx(struct mlx5e_priv *priv) +{ +} + +static void mlx5e_mgmt_pf_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5e_fs_init_l2_addr(priv->fs, netdev); + + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5e_modify_mgmt_pf_admin_state(mdev, MLX5_PORT_DOWN); + + mlx5e_set_netdev_mtu_boundaries(priv); + mlx5e_set_dev_port_mtu(priv); + + mlx5e_mgmt_pf_enable_async_events(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_init(priv); + + mlx5e_hv_vhca_stats_create(priv); + if (netdev->reg_state != NETREG_REGISTERED) + return; + mlx5e_dcbnl_init_app(priv); + + mlx5e_nic_set_rx_mode(priv); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); + netif_device_attach(netdev); + rtnl_unlock(); +} + +static void mlx5e_mgmt_pf_disable(struct mlx5e_priv *priv) +{ + if (priv->netdev->reg_state == NETREG_REGISTERED) + mlx5e_dcbnl_delete_app(priv); + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + + mlx5e_nic_set_rx_mode(priv); + + mlx5e_hv_vhca_stats_destroy(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_cleanup(priv); + + mlx5e_disable_mgmt_pf_async_events(priv); + mlx5e_ipsec_cleanup(priv); +} + +static int mlx5e_mgmt_pf_update_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false, false); +} + +static int mlx5e_mgmt_pf_max_nch_limit(struct mlx5_core_dev *mdev) +{ + return 1; +} + +const struct mlx5e_profile mlx5e_mgmt_pf_nic_profile = { + .init = mlx5e_mgmt_pf_init, + .cleanup = mlx5e_mgmt_pf_cleanup, + .init_rx = mlx5e_mgmt_pf_init_rx, + .cleanup_rx = mlx5e_mgmt_pf_cleanup_rx, + .init_tx = mlx5e_mgmt_pf_init_tx, + .cleanup_tx = mlx5e_mgmt_pf_cleanup_tx, + .enable = mlx5e_mgmt_pf_enable, + .disable = mlx5e_mgmt_pf_disable, + .update_rx = mlx5e_mgmt_pf_update_rx, + .update_stats = mlx5e_stats_update_ndo_stats, + .update_carrier = mlx5e_update_carrier, + .rx_handlers = &mlx5e_rx_handlers_nic, + .max_tc = 1, + .max_nch_limit = mlx5e_mgmt_pf_max_nch_limit, + .stats_grps = mlx5e_nic_stats_grps, + .stats_grps_num = mlx5e_nic_stats_grps_num +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c index 40c8df111754..e2d8d2754be0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -20,10 +20,8 @@ #define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 #define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 -int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +static int mlx5e_monitor_counter_cap(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters)) return false; if (MLX5_CAP_PCAM_REG(mdev, ppcnt) && @@ -36,24 +34,38 @@ int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) return true; } -static void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv) +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *pos; + int i; + + mlx5_sd_for_each_dev(i, priv->mdev, pos) + if (!mlx5e_monitor_counter_cap(pos)) + return false; + return true; +} + +static void mlx5e_monitor_counter_arm(struct mlx5_core_dev *mdev) { u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {}; MLX5_SET(arm_monitor_counter_in, in, opcode, MLX5_CMD_OP_ARM_MONITOR_COUNTER); - mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in); + mlx5_cmd_exec_in(mdev, arm_monitor_counter, in); } static void mlx5e_monitor_counters_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, monitor_counters_work); + struct mlx5_core_dev *pos; + int i; mutex_lock(&priv->state_lock); mlx5e_stats_update_ndo_stats(priv); mutex_unlock(&priv->state_lock); - mlx5e_monitor_counter_arm(priv); + mlx5_sd_for_each_dev(i, priv->mdev, pos) + mlx5e_monitor_counter_arm(pos); } static int mlx5e_monitor_event_handler(struct notifier_block *nb, @@ -97,15 +109,13 @@ static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in) } /* check if mlx5e_monitor_counter_supported before calling this function*/ -static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) +static void mlx5e_set_monitor_counter(struct mlx5_core_dev *mdev, int q_counter) { - struct mlx5_core_dev *mdev = priv->mdev; int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters); int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters); int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 : MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters); u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; - int q_counter = priv->q_counter; int cnt = 0; if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 && @@ -127,13 +137,17 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) /* check if mlx5e_monitor_counter_supported before calling this function*/ void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) { + struct mlx5_core_dev *pos; + int i; + INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work); MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, MONITOR_COUNTER); - mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb); - - mlx5e_set_monitor_counter(priv); - mlx5e_monitor_counter_arm(priv); + mlx5_sd_for_each_dev(i, priv->mdev, pos) { + mlx5_eq_notifier_register(pos, &priv->monitor_counters_nb); + mlx5e_set_monitor_counter(pos, priv->q_counter[i]); + mlx5e_monitor_counter_arm(pos); + } queue_work(priv->wq, &priv->update_stats_work); } @@ -141,11 +155,15 @@ void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv) { u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + struct mlx5_core_dev *pos; + int i; MLX5_SET(set_monitor_counter_in, in, opcode, MLX5_CMD_OP_SET_MONITOR_COUNTER); - mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in); - mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb); + mlx5_sd_for_each_dev(i, priv->mdev, pos) { + mlx5_cmd_exec_in(pos, set_monitor_counter, in); + mlx5_eq_notifier_unregister(pos, &priv->monitor_counters_nb); + } cancel_work_sync(&priv->monitor_counters_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 284253b79266..fb10bb166fbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -674,7 +674,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e .napi = &c->napi, .ch_stats = c->stats, .node = cpu_to_node(c->cpu), - .ix = c->ix, + .ix = c->vec_ix, }; } @@ -945,7 +945,6 @@ static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *param int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - u16 q_counter, struct mlx5e_rq_param *param) { void *rqc = param->rqc; @@ -1007,7 +1006,6 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); - MLX5_SET(rqc, rqc, counter_set_id, q_counter); MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); @@ -1018,7 +1016,6 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, - u16 q_counter, struct mlx5e_rq_param *param) { void *rqc = param->rqc; @@ -1027,7 +1024,6 @@ void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); - MLX5_SET(rqc, rqc, counter_set_id, q_counter); param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); } @@ -1292,13 +1288,12 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - u16 q_counter, struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz, async_icosq_log_wq_sz; int err; - err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + err = mlx5e_build_rq_param(mdev, params, NULL, &cparam->rq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 6800949dafbc..9a781f18b57f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -130,10 +130,8 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - u16 q_counter, struct mlx5e_rq_param *param); void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, - u16 q_counter, struct mlx5e_rq_param *param); void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param); @@ -149,7 +147,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param); int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - u16 q_counter, struct mlx5e_channel_param *cparam); u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index c206cc0a8483..cafb41895f94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -646,7 +646,6 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, struct net_device *netdev, - u16 q_counter, struct mlx5e_ptp_params *ptp_params) { struct mlx5e_rq_param *rq_params = &ptp_params->rq_param; @@ -655,7 +654,7 @@ static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; mlx5e_init_rq_type_params(mdev, params); params->sw_mtu = netdev->max_mtu; - mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params); + mlx5e_build_rq_param(mdev, params, NULL, rq_params); } static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, @@ -681,7 +680,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, /* RQ */ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { params->vlan_strip_disable = orig->vlan_strip_disable; - mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + mlx5e_ptp_build_rq_param(c->mdev, c->netdev, cparams); } } @@ -714,13 +713,16 @@ static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) { int node = dev_to_node(c->mdev->device); - int err; + int err, sd_ix; + u16 q_counter; err = mlx5e_init_ptp_rq(c, params, &c->rq); if (err) return err; - return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq); + sd_ix = mlx5_sd_ch_ix_get_dev_ix(c->mdev, MLX5E_PTP_CHANNEL_IX); + q_counter = c->priv->q_counter[sd_ix]; + return mlx5e_open_rq(params, rq_param, NULL, node, q_counter, &c->rq); } static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 34adf8c3f81a..e87e26f2c669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -122,8 +122,8 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, memset(¶m_sq, 0, sizeof(param_sq)); memset(¶m_cq, 0, sizeof(param_cq)); - mlx5e_build_sq_param(priv->mdev, params, ¶m_sq); - mlx5e_build_tx_cq_param(priv->mdev, params, ¶m_cq); + mlx5e_build_sq_param(c->mdev, params, ¶m_sq); + mlx5e_build_tx_cq_param(c->mdev, params, ¶m_cq); err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); if (err) goto err_free_sq; @@ -176,7 +176,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id) */ smp_wmb(); - qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid); + qos_dbg(sq->mdev, "Activate QoS SQ qid %u\n", node_qid); mlx5e_activate_txqsq(sq); return 0; @@ -190,7 +190,7 @@ void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) if (!sq) /* Handle the case when the SQ failed to open. */ return; - qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); + qos_dbg(sq->mdev, "Deactivate QoS SQ qid %u\n", qid); mlx5e_deactivate_txqsq(sq); priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 4358798d6ce1..25d751eba99b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -294,8 +294,8 @@ static void mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, params = &priv->channels.params; rq_sz = mlx5e_rqwq_get_size(rq); - real_time = mlx5_is_real_time_rq(priv->mdev); - rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); + real_time = mlx5_is_real_time_rq(rq->mdev); + rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(rq->mdev, params, NULL)); mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 6b44ddce14e9..0ab9db319530 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -219,7 +219,6 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq, int tc) { bool stopped = netif_xmit_stopped(sq->txq); - struct mlx5e_priv *priv = sq->priv; u8 state; int err; @@ -227,7 +226,7 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); - err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + err = mlx5_core_query_sq_state(sq->mdev, sq->sqn, &state); if (!err) devlink_fmsg_u8_pair_put(fmsg, "HW state", state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c index 7b8ff7a71003..bcafb4bf9415 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -4,6 +4,33 @@ #include "rqt.h" #include <linux/mlx5/transobj.h> +static bool verify_num_vhca_ids(struct mlx5_core_dev *mdev, u32 *vhca_ids, + unsigned int size) +{ + unsigned int max_num_vhca_id = MLX5_CAP_GEN_2(mdev, max_rqt_vhca_id); + int i; + + /* Verify that all vhca_ids are in range [0, max_num_vhca_ids - 1] */ + for (i = 0; i < size; i++) + if (vhca_ids[i] >= max_num_vhca_id) + return false; + return true; +} + +static bool rqt_verify_vhca_ids(struct mlx5_core_dev *mdev, u32 *vhca_ids, + unsigned int size) +{ + if (!vhca_ids) + return true; + + if (!MLX5_CAP_GEN(mdev, cross_vhca_rqt)) + return false; + if (!verify_num_vhca_ids(mdev, vhca_ids, size)) + return false; + + return true; +} + void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, unsigned int num_channels) { @@ -13,19 +40,38 @@ void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, indir->table[i] = i % num_channels; } +static void fill_rqn_list(void *rqtc, u32 *rqns, u32 *vhca_ids, unsigned int size) +{ + unsigned int i; + + if (vhca_ids) { + MLX5_SET(rqtc, rqtc, rq_vhca_id_format, 1); + for (i = 0; i < size; i++) { + MLX5_SET(rqtc, rqtc, rq_vhca[i].rq_num, rqns[i]); + MLX5_SET(rqtc, rqtc, rq_vhca[i].rq_vhca_id, vhca_ids[i]); + } + } else { + for (i = 0; i < size; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]); + } +} static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, - u16 max_size, u32 *init_rqns, u16 init_size) + u16 max_size, u32 *init_rqns, u32 *init_vhca_ids, u16 init_size) { + int entry_sz; void *rqtc; int inlen; int err; u32 *in; - int i; + + if (!rqt_verify_vhca_ids(mdev, init_vhca_ids, init_size)) + return -EOPNOTSUPP; rqt->mdev = mdev; rqt->size = max_size; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size; + entry_sz = init_vhca_ids ? MLX5_ST_SZ_BYTES(rq_vhca) : MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + entry_sz * init_size; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -33,10 +79,9 @@ static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size); - MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size); - for (i = 0; i < init_size; i++) - MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]); + + fill_rqn_list(rqtc, init_rqns, init_vhca_ids, init_size); err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn); @@ -49,7 +94,7 @@ int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, { u16 max_size = indir_enabled ? indir_table_size : 1; - return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1); + return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, NULL, 1); } static int mlx5e_bits_invert(unsigned long a, int size) @@ -63,7 +108,8 @@ static int mlx5e_bits_invert(unsigned long a, int size) return inv; } -static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns, +static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, u32 *rss_vhca_ids, u32 *vhca_ids, + unsigned int num_rqns, u8 hfunc, struct mlx5e_rss_params_indir *indir) { unsigned int i; @@ -82,30 +128,42 @@ static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns */ return -EINVAL; rss_rqns[i] = rqns[ix]; + if (vhca_ids) + rss_vhca_ids[i] = vhca_ids[ix]; } return 0; } int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, - u32 *rqns, unsigned int num_rqns, + u32 *rqns, u32 *vhca_ids, unsigned int num_rqns, u8 hfunc, struct mlx5e_rss_params_indir *indir) { - u32 *rss_rqns; + u32 *rss_rqns, *rss_vhca_ids = NULL; int err; rss_rqns = kvmalloc_array(indir->actual_table_size, sizeof(*rss_rqns), GFP_KERNEL); if (!rss_rqns) return -ENOMEM; - err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (vhca_ids) { + rss_vhca_ids = kvmalloc_array(indir->actual_table_size, sizeof(*rss_vhca_ids), + GFP_KERNEL); + if (!rss_vhca_ids) { + kvfree(rss_rqns); + return -ENOMEM; + } + } + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, rss_vhca_ids, vhca_ids, num_rqns, hfunc, indir); if (err) goto out; - err = mlx5e_rqt_init(rqt, mdev, indir->max_table_size, rss_rqns, + err = mlx5e_rqt_init(rqt, mdev, indir->max_table_size, rss_rqns, rss_vhca_ids, indir->actual_table_size); out: + kvfree(rss_vhca_ids); kvfree(rss_rqns); return err; } @@ -126,15 +184,20 @@ void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn); } -static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size) +static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids, + unsigned int size) { - unsigned int i; + int entry_sz; void *rqtc; int inlen; u32 *in; int err; - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size; + if (!rqt_verify_vhca_ids(rqt->mdev, vhca_ids, size)) + return -EINVAL; + + entry_sz = vhca_ids ? MLX5_ST_SZ_BYTES(rq_vhca) : MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + entry_sz * size; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -143,8 +206,8 @@ static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int siz MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); MLX5_SET(rqtc, rqtc, rqt_actual_size, size); - for (i = 0; i < size; i++) - MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]); + + fill_rqn_list(rqtc, rqns, vhca_ids, size); err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen); @@ -152,17 +215,21 @@ static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int siz return err; } -int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn) +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id) { - return mlx5e_rqt_redirect(rqt, &rqn, 1); + return mlx5e_rqt_redirect(rqt, &rqn, vhca_id, 1); } -int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids, + unsigned int num_rqns, u8 hfunc, struct mlx5e_rss_params_indir *indir) { - u32 *rss_rqns; + u32 *rss_rqns, *rss_vhca_ids = NULL; int err; + if (!rqt_verify_vhca_ids(rqt->mdev, vhca_ids, num_rqns)) + return -EINVAL; + if (WARN_ON(rqt->size != indir->max_table_size)) return -EINVAL; @@ -170,13 +237,23 @@ int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_ if (!rss_rqns) return -ENOMEM; - err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir); + if (vhca_ids) { + rss_vhca_ids = kvmalloc_array(indir->actual_table_size, sizeof(*rss_vhca_ids), + GFP_KERNEL); + if (!rss_vhca_ids) { + kvfree(rss_rqns); + return -ENOMEM; + } + } + + err = mlx5e_calc_indir_rqns(rss_rqns, rqns, rss_vhca_ids, vhca_ids, num_rqns, hfunc, indir); if (err) goto out; - err = mlx5e_rqt_redirect(rqt, rss_rqns, indir->actual_table_size); + err = mlx5e_rqt_redirect(rqt, rss_rqns, rss_vhca_ids, indir->actual_table_size); out: + kvfree(rss_vhca_ids); kvfree(rss_rqns); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h index 77fba3ebd18d..e0bc30308c77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -20,7 +20,7 @@ void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, unsigned int num_channels); struct mlx5e_rqt { - struct mlx5_core_dev *mdev; + struct mlx5_core_dev *mdev; /* primary */ u32 rqtn; u16 size; }; @@ -28,7 +28,7 @@ struct mlx5e_rqt { int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, bool indir_enabled, u32 init_rqn, u32 indir_table_size); int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, - u32 *rqns, unsigned int num_rqns, + u32 *rqns, u32 *vhca_ids, unsigned int num_rqns, u8 hfunc, struct mlx5e_rss_params_indir *indir); void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt); @@ -38,8 +38,9 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt) } u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels); -int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn); -int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns, +int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id); +int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids, + unsigned int num_rqns, u8 hfunc, struct mlx5e_rss_params_indir *indir); #endif /* __MLX5_EN_RQT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index c1545a2e8d6d..5f742f896600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -74,7 +74,7 @@ struct mlx5e_rss { struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_rqt rqt; - struct mlx5_core_dev *mdev; + struct mlx5_core_dev *mdev; /* primary */ u32 drop_rqn; bool inner_ft_support; bool enabled; @@ -473,21 +473,22 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, return 0; } -static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +static int mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns) { int err; - err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir); + err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, vhca_ids, num_rqns, rss->hash.hfunc, + &rss->indir); if (err) mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n", mlx5e_rqt_get_rqtn(&rss->rqt), err); return err; } -void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns) +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns) { rss->enabled = true; - mlx5e_rss_apply(rss, rqns, num_rqns); + mlx5e_rss_apply(rss, rqns, vhca_ids, num_rqns); } void mlx5e_rss_disable(struct mlx5e_rss *rss) @@ -495,7 +496,7 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss) int err; rss->enabled = false; - err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn); + err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn, NULL); if (err) mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n", mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err); @@ -568,7 +569,7 @@ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, const u8 *key, const u8 *hfunc, - u32 *rqns, unsigned int num_rqns) + u32 *rqns, u32 *vhca_ids, unsigned int num_rqns) { bool changed_indir = false; bool changed_hash = false; @@ -608,7 +609,7 @@ int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, } if (changed_indir && rss->enabled) { - err = mlx5e_rss_apply(rss, rqns, num_rqns); + err = mlx5e_rss_apply(rss, rqns, vhca_ids, num_rqns); if (err) { mlx5e_rss_copy(rss, old_rss); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index d1d0bc350e92..d0df98963c8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -39,7 +39,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, const struct mlx5e_packet_merge_param *init_pkt_merge_param, bool inner, u32 *tirn); -void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns); +void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); void mlx5e_rss_disable(struct mlx5e_rss *rss); int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, @@ -47,7 +47,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, const u8 *key, const u8 *hfunc, - u32 *rqns, unsigned int num_rqns); + u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss); u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt); int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index b23e224e3763..a86eade9a9e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -8,7 +8,7 @@ #define MLX5E_MAX_NUM_RSS 16 struct mlx5e_rx_res { - struct mlx5_core_dev *mdev; + struct mlx5_core_dev *mdev; /* primary */ enum mlx5e_rx_res_features features; unsigned int max_nch; u32 drop_rqn; @@ -19,6 +19,7 @@ struct mlx5e_rx_res { struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; bool rss_active; u32 *rss_rqns; + u32 *rss_vhca_ids; unsigned int rss_nch; struct { @@ -34,6 +35,13 @@ struct mlx5e_rx_res { /* API for rx_res_rss_* */ +static u32 *get_vhca_ids(struct mlx5e_rx_res *res, int offset) +{ + bool multi_vhca = res->features & MLX5E_RX_RES_FEATURE_MULTI_VHCA; + + return multi_vhca ? res->rss_vhca_ids + offset : NULL; +} + void mlx5e_rx_res_rss_update_num_channels(struct mlx5e_rx_res *res, u32 nch) { int i; @@ -85,8 +93,11 @@ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int i return PTR_ERR(rss); mlx5e_rss_set_indir_uniform(rss, init_nch); - if (res->rss_active) - mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); + if (res->rss_active) { + u32 *vhca_ids = get_vhca_ids(res, 0); + + mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch); + } res->rss[i] = rss; *rss_idx = i; @@ -153,10 +164,12 @@ static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) { struct mlx5e_rss *rss = res->rss[i]; + u32 *vhca_ids; if (!rss) continue; - mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch); + vhca_ids = get_vhca_ids(res, 0); + mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch); } } @@ -200,6 +213,7 @@ int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, const u32 *indir, const u8 *key, const u8 *hfunc) { + u32 *vhca_ids = get_vhca_ids(res, 0); struct mlx5e_rss *rss; if (rss_idx >= MLX5E_MAX_NUM_RSS) @@ -209,7 +223,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, if (!rss) return -ENOENT; - return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch); + return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, vhca_ids, + res->rss_nch); } int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, @@ -280,11 +295,13 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx) static void mlx5e_rx_res_free(struct mlx5e_rx_res *res) { + kvfree(res->rss_vhca_ids); kvfree(res->rss_rqns); kvfree(res); } -static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsigned int max_nch) +static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsigned int max_nch, + bool multi_vhca) { struct mlx5e_rx_res *rx_res; @@ -298,6 +315,15 @@ static struct mlx5e_rx_res *mlx5e_rx_res_alloc(struct mlx5_core_dev *mdev, unsig return NULL; } + if (multi_vhca) { + rx_res->rss_vhca_ids = kvcalloc(max_nch, sizeof(*rx_res->rss_vhca_ids), GFP_KERNEL); + if (!rx_res->rss_vhca_ids) { + kvfree(rx_res->rss_rqns); + kvfree(rx_res); + return NULL; + } + } + return rx_res; } @@ -424,10 +450,11 @@ mlx5e_rx_res_create(struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features featu const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch) { + bool multi_vhca = features & MLX5E_RX_RES_FEATURE_MULTI_VHCA; struct mlx5e_rx_res *res; int err; - res = mlx5e_rx_res_alloc(mdev, max_nch); + res = mlx5e_rx_res_alloc(mdev, max_nch, multi_vhca); if (!res) return ERR_PTR(-ENOMEM); @@ -504,10 +531,11 @@ static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, unsigned int ix) { + u32 *vhca_id = get_vhca_ids(res, ix); u32 rqn = res->rss_rqns[ix]; int err; - err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn, vhca_id); if (err) mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), @@ -519,7 +547,7 @@ static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res, { int err; - err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn, NULL); if (err) mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), @@ -534,10 +562,12 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann nch = mlx5e_channels_get_num(chs); for (ix = 0; ix < chs->num; ix++) { + u32 *vhca_id = get_vhca_ids(res, ix); + if (mlx5e_channels_is_xsk(chs, ix)) - mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix], vhca_id); else - mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix], vhca_id); } res->rss_nch = chs->num; @@ -554,7 +584,7 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann if (!mlx5e_channels_get_ptp_rqn(chs, &rqn)) rqn = res->drop_rqn; - err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn, NULL); if (err) mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n", mlx5e_rqt_get_rqtn(&res->ptp.rqt), @@ -573,7 +603,7 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { - err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn, NULL); if (err) mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n", mlx5e_rqt_get_rqtn(&res->ptp.rqt), @@ -584,10 +614,12 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, unsigned int ix, bool xsk) { + u32 *vhca_id = get_vhca_ids(res, ix); + if (xsk) - mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]); + mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix], vhca_id); else - mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix], vhca_id); mlx5e_rx_res_rss_enable(res); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 82aaba8a82b3..7b1a9f0f1874 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -18,6 +18,7 @@ struct mlx5e_rss_params_hash; enum mlx5e_rx_res_features { MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), MLX5E_RX_RES_FEATURE_PTP = BIT(1), + MLX5E_RX_RES_FEATURE_MULTI_VHCA = BIT(2), }; /* Setup */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index f63402c48028..1b418095b79a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -197,7 +197,7 @@ parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, } esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; esw_attr->out_count++; - /* attr->dests[].rep is resolved when we handle encap */ + /* attr->dests[].vport is resolved when we handle encap */ return 0; } @@ -270,7 +270,8 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].vport_valid = true; + esw_attr->dests[esw_attr->out_count].vport = rpriv->rep->vport; esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; esw_attr->out_count++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 668da5c70e63..8dfb57f712b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -300,6 +300,10 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + encap_header = NULL; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -310,8 +314,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv4_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -319,8 +323,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto destroy_neigh_entry; } - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -403,18 +405,23 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, if (err) goto free_encap; + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + encap_header = NULL; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto free_encap; + goto release_neigh; } memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv4_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -422,10 +429,6 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, goto free_encap; } - e->encap_size = ipv4_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -567,6 +570,10 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + encap_header = NULL; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -577,8 +584,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv6_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -586,8 +593,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto destroy_neigh_entry; } - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); @@ -669,18 +674,23 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, if (err) goto free_encap; + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + encap_header = NULL; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto free_encap; + goto release_neigh; } memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = e->reformat_type; - reformat_params.size = ipv6_encap_size; - reformat_params.data = encap_header; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { @@ -688,10 +698,6 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, goto free_encap; } - e->encap_size = ipv6_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index b10e40e1a9c1..f1d1e1542e81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1064,7 +1064,8 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].vport_valid = true; + esw_attr->dests[out_index].vport = rpriv->rep->vport; esw_attr->dests[out_index].mdev = out_priv->mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index ac458a8d10e0..53ca16cb9c41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -63,10 +63,12 @@ static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) struct mlx5e_create_cq_param ccp = {}; struct dim_cq_moder trap_moder = {}; struct mlx5e_rq *rq = &t->rq; + u16 q_counter; int node; int err; node = dev_to_node(mdev->device); + q_counter = priv->q_counter[0]; ccp.netdev = priv->netdev; ccp.wq = priv->wq; @@ -79,7 +81,7 @@ static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) return err; mlx5e_init_trap_rq(t, &t->params, rq); - err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq); + err = mlx5e_open_rq(&t->params, rq_param, NULL, node, q_counter, rq); if (err) goto err_destroy_cq; @@ -116,15 +118,14 @@ static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct ml } static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, - int max_mtu, u16 q_counter, - struct mlx5e_trap *t) + int max_mtu, struct mlx5e_trap *t) { struct mlx5e_params *params = &t->params; params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; mlx5e_init_rq_type_params(mdev, params); params->sw_mtu = max_mtu; - mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param); + mlx5e_build_rq_param(mdev, params, NULL, &t->rq_param); } static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) @@ -138,7 +139,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) if (!t) return ERR_PTR(-ENOMEM); - mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t); + mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, t); t->priv = priv; t->mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 9e695ed122ee..82b5ca1be4f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -540,6 +540,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, dma_addr_t dma_addr = xdptxd->dma_addr; u32 dma_len = xdptxd->len; u16 ds_cnt, inline_hdr_sz; + unsigned int frags_size; u8 num_wqebbs = 1; int num_frags = 0; bool inline_ok; @@ -550,8 +551,9 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE || dma_len >= MLX5E_XDP_MIN_INLINE; + frags_size = xdptxd->has_frags ? xdptxdf->sinfo->xdp_frags_size : 0; - if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) { + if (unlikely(!inline_ok || sq->hw_mtu < dma_len + frags_size)) { stats->err++; return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index ebada0c5af3c..db776e515b6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -6,10 +6,10 @@ #include "setup.h" #include "en/params.h" -static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, +static int mlx5e_xsk_map_pool(struct mlx5_core_dev *mdev, struct xsk_buff_pool *pool) { - struct device *dev = mlx5_core_dma_dev(priv->mdev); + struct device *dev = mlx5_core_dma_dev(mdev); return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); } @@ -89,7 +89,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) return -EINVAL; - err = mlx5e_xsk_map_pool(priv, pool); + err = mlx5e_xsk_map_pool(mlx5_sd_ch_ix_get_dev(priv->mdev, ix), pool); if (unlikely(err)) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 82e6abbc1734..06592b9f0424 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -49,10 +49,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - u16 q_counter, struct mlx5e_channel_param *cparam) { - mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); + mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq); mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); } @@ -93,6 +92,7 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) { + u16 q_counter = c->priv->q_counter[c->sd_ix]; struct mlx5e_rq *xskrq = &c->xskrq; int err; @@ -100,7 +100,7 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param if (err) return err; - err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq); + err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), q_counter, xskrq); if (err) return err; @@ -125,7 +125,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (!cparam) return -ENOMEM; - mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam); + mlx5e_build_xsk_cparam(priv->mdev, params, xsk, cparam); err = mlx5e_open_cq(c->mdev, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, &c->xskrq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index c1e89dc77db9..41a2543a52cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2142,7 +2142,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) static void mlx5e_ipsec_unblock_tc_offload(struct mlx5_core_dev *mdev) { - mdev->num_block_tc++; + mdev->num_block_tc--; } int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 984fa04bd331..e3e57c849436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -96,7 +96,7 @@ bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) { u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); - if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx)) + if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx) || mlx5_get_sd(mdev)) return false; /* Check the possibility to post the required ICOSQ WQEs. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index f11075e67658..adc6d8ea0960 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -11,6 +11,7 @@ #ifdef CONFIG_MLX5_EN_TLS #include "lib/crypto.h" +#include "lib/mlx5.h" struct mlx5_crypto_dek *mlx5_ktls_create_key(struct mlx5_crypto_dek_pool *dek_pool, struct tls_crypto_info *crypto_info); @@ -61,7 +62,8 @@ void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_ static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev) { - return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx); + return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx) && + !mlx5_get_sd(mdev); } bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 9b597cb24598..65ccb33edafb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -267,7 +267,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, goto err_out; } - pdev = mlx5_core_dma_dev(sq->channel->priv->mdev); + pdev = mlx5_core_dma_dev(sq->channel->mdev); buf->dma_addr = dma_map_single(pdev, &buf->progress, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { @@ -425,14 +425,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; - struct mlx5e_ktls_rx_resync_ctx *resync; u8 tracker_state, auth_state, *ctx; struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; - resync = &priv_rx->resync; - dev = mlx5_core_dma_dev(resync->priv->mdev); + dev = mlx5_core_dma_dev(sq->channel->mdev); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 67f546683e85..6ed3a32b7e22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -95,7 +95,7 @@ static void mlx5e_destroy_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PO { int tc, i; - for (i = 0; i < MLX5_MAX_PORTS; i++) + for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++) for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++) mlx5e_destroy_tis(mdev, tisn[i][tc]); } @@ -110,7 +110,7 @@ static int mlx5e_create_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PORT int tc, i; int err; - for (i = 0; i < MLX5_MAX_PORTS; i++) { + for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++) { for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; void *tisc; @@ -140,7 +140,7 @@ err_close_tises: return err; } -int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises) { struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; int err; @@ -169,11 +169,15 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_destroy_mkey; } - err = mlx5e_create_tises(mdev, res->tisn); - if (err) { - mlx5_core_err(mdev, "alloc tises failed, %d\n", err); - goto err_destroy_bfreg; + if (create_tises) { + err = mlx5e_create_tises(mdev, res->tisn); + if (err) { + mlx5_core_err(mdev, "alloc tises failed, %d\n", err); + goto err_destroy_bfreg; + } + res->tisn_valid = true; } + INIT_LIST_HEAD(&res->td.tirs_list); mutex_init(&res->td.list_lock); @@ -203,7 +207,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_crypto_dek_cleanup(mdev->mlx5e_res.dek_priv); mdev->mlx5e_res.dek_priv = NULL; - mlx5e_destroy_tises(mdev, res->tisn); + if (res->tisn_valid) + mlx5e_destroy_tises(mdev, res->tisn); mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 0fe7ea88d567..cc51ce16df14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -49,7 +49,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - if (count == sizeof(drvinfo->fw_version)) + if (count >= sizeof(drvinfo->fw_version)) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b5f1c4ca38ba..40626b6108fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -70,6 +70,7 @@ #include "qos.h" #include "en/trap.h" #include "lib/devcom.h" +#include "lib/sd.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) @@ -1024,7 +1025,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5_wq_destroy(&rq->wq_ctrl); } -int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_counter) { struct mlx5_core_dev *mdev = rq->mdev; u8 ts_format; @@ -1051,6 +1052,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, ts_format, ts_format); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -1274,7 +1276,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, - struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_xsk_param *xsk, int node, u16 q_counter, struct mlx5e_rq *rq) { struct mlx5_core_dev *mdev = rq->mdev; @@ -1287,7 +1289,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, if (err) return err; - err = mlx5e_create_rq(rq, param); + err = mlx5e_create_rq(rq, param, q_counter); if (err) goto err_free_rq; @@ -2333,13 +2335,14 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_rq_param *rq_params) { + u16 q_counter = c->priv->q_counter[c->sd_ix]; int err; err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); if (err) return err; - return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq); } static int mlx5e_open_queues(struct mlx5e_channel *c, @@ -2526,14 +2529,20 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { - int cpu = mlx5_comp_vector_get_cpu(priv->mdev, ix); struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; unsigned int irq; + int vec_ix; + int cpu; int err; - err = mlx5_comp_irqn_get(priv->mdev, ix, &irq); + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); + vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); + cpu = mlx5_comp_vector_get_cpu(mdev, vec_ix); + + err = mlx5_comp_irqn_get(mdev, vec_ix, &irq); if (err) return err; @@ -2546,18 +2555,20 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return -ENOMEM; c->priv = priv; - c->mdev = priv->mdev; + c->mdev = mdev; c->tstamp = &priv->tstamp; c->ix = ix; + c->vec_ix = vec_ix; + c->sd_ix = mlx5_sd_ch_ix_get_dev_ix(mdev, ix); c->cpu = cpu; - c->pdev = mlx5_core_dma_dev(priv->mdev); + c->pdev = mlx5_core_dma_dev(mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); + c->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey); c->num_tc = mlx5e_get_dcb_num_tc(params); c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix]->ch; c->aff_mask = irq_get_effective_affinity_mask(irq); - c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); + c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll); @@ -2647,7 +2658,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (!chs->c || !cparam) goto err_free; - err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); + err = mlx5e_build_channel_param(priv->mdev, &chs->params, cparam); if (err) goto err_free; @@ -2935,15 +2946,18 @@ static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues); static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = priv->mdev; - int num_comp_vectors, ix, irq; - - num_comp_vectors = mlx5_comp_vectors_max(mdev); + int ix; for (ix = 0; ix < params->num_channels; ix++) { + int num_comp_vectors, irq, vec_ix; + struct mlx5_core_dev *mdev; + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); + num_comp_vectors = mlx5_comp_vectors_max(mdev); cpumask_clear(priv->scratchpad.cpumask); + vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); - for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); @@ -3335,7 +3349,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv, struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param); + mlx5e_build_drop_rq_param(mdev, &rq_param); err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); if (err) @@ -3349,7 +3363,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv, if (err) goto err_destroy_cq; - err = mlx5e_create_rq(drop_rq, &rq_param); + err = mlx5e_create_rq(drop_rq, &rq_param, priv->drop_rq_q_counter); if (err) goto err_free_rq; @@ -3785,7 +3799,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; } -static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) +void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) { if (mlx5e_is_uplink_rep(priv)) return; /* no rx mode for uplink rep */ @@ -4990,6 +5004,15 @@ const struct net_device_ops mlx5e_netdev_ops = { #endif }; +const struct net_device_ops mlx5e_mgmt_netdev_ops = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_change_mtu = mlx5e_change_nic_mtu, + .ndo_set_rx_mode = mlx5e_set_rx_mode, +}; + static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -5129,7 +5152,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); - netdev->netdev_ops = &mlx5e_netdev_ops; + if (mlx5_core_is_mgmt_pf(mdev)) + netdev->netdev_ops = &mlx5e_mgmt_netdev_ops; + else + netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; @@ -5264,13 +5291,17 @@ void mlx5e_create_q_counters(struct mlx5e_priv *priv) u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - int err; + struct mlx5_core_dev *pos; + int err, i; MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); - if (!err) - priv->q_counter = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); + + mlx5_sd_for_each_dev(i, mdev, pos) { + err = mlx5_cmd_exec_inout(pos, alloc_q_counter, in, out); + if (!err) + priv->q_counter[i] = + MLX5_GET(alloc_q_counter_out, out, counter_set_id); + } err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out); if (!err) @@ -5281,13 +5312,17 @@ void mlx5e_create_q_counters(struct mlx5e_priv *priv) void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; + struct mlx5_core_dev *pos; + int i; MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); - if (priv->q_counter) { - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, - priv->q_counter); - mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in); + mlx5_sd_for_each_dev(i, priv->mdev, pos) { + if (priv->q_counter[i]) { + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, + priv->q_counter[i]); + mlx5_cmd_exec_in(pos, dealloc_q_counter, in); + } } if (priv->drop_rq_q_counter) { @@ -5371,6 +5406,8 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) features = MLX5E_RX_RES_FEATURE_PTP; if (mlx5_tunnel_inner_ft_supported(mdev)) features |= MLX5E_RX_RES_FEATURE_INNER_FT; + if (mlx5_get_sd(priv->mdev)) + features |= MLX5E_RX_RES_FEATURE_MULTI_VHCA; priv->rx_res = mlx5e_rx_res_create(priv->mdev, features, priv->max_nch, priv->drop_rq.rqn, &priv->channels.params.packet_merge, @@ -5980,28 +6017,52 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv) free_netdev(netdev); } -static int mlx5e_resume(struct auxiliary_device *adev) +static int _mlx5e_resume(struct auxiliary_device *adev) { struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); struct mlx5e_priv *priv = mlx5e_dev->priv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = edev->mdev; - int err; + struct mlx5_core_dev *pos, *to; + int err, i; if (netif_device_present(netdev)) return 0; - err = mlx5e_create_mdev_resources(mdev); - if (err) - return err; + mlx5_sd_for_each_dev(i, mdev, pos) { + err = mlx5e_create_mdev_resources(pos, true); + if (err) + goto err_destroy_mdev_res; + } err = mlx5e_attach_netdev(priv); - if (err) { - mlx5e_destroy_mdev_resources(mdev); + if (err) + goto err_destroy_mdev_res; + + return 0; + +err_destroy_mdev_res: + to = pos; + mlx5_sd_for_each_dev_to(i, mdev, to, pos) + mlx5e_destroy_mdev_resources(pos); + return err; +} + +static int mlx5e_resume(struct auxiliary_device *adev) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct auxiliary_device *actual_adev; + int err; + + err = mlx5_sd_init(mdev); + if (err) return err; - } + actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); + if (actual_adev) + return _mlx5e_resume(actual_adev); return 0; } @@ -6011,33 +6072,53 @@ static int _mlx5e_suspend(struct auxiliary_device *adev) struct mlx5e_priv *priv = mlx5e_dev->priv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *pos; + int i; if (!netif_device_present(netdev)) { if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) - mlx5e_destroy_mdev_resources(mdev); + mlx5_sd_for_each_dev(i, mdev, pos) + mlx5e_destroy_mdev_resources(pos); return -ENODEV; } mlx5e_detach_netdev(priv); - mlx5e_destroy_mdev_resources(mdev); + mlx5_sd_for_each_dev(i, mdev, pos) + mlx5e_destroy_mdev_resources(pos); + return 0; } static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) { - return _mlx5e_suspend(adev); + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct auxiliary_device *actual_adev; + int err = 0; + + actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); + if (actual_adev) + err = _mlx5e_suspend(actual_adev); + + mlx5_sd_cleanup(mdev); + return err; } static int _mlx5e_probe(struct auxiliary_device *adev) { struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); - const struct mlx5e_profile *profile = &mlx5e_nic_profile; struct mlx5_core_dev *mdev = edev->mdev; + const struct mlx5e_profile *profile; struct mlx5e_dev *mlx5e_dev; struct net_device *netdev; struct mlx5e_priv *priv; int err; + if (mlx5_core_is_mgmt_pf(mdev)) + profile = &mlx5e_mgmt_pf_nic_profile; + else + profile = &mlx5e_nic_profile; + mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev); if (IS_ERR(mlx5e_dev)) return PTR_ERR(mlx5e_dev); @@ -6071,9 +6152,9 @@ static int _mlx5e_probe(struct auxiliary_device *adev) goto err_destroy_netdev; } - err = mlx5e_resume(adev); + err = _mlx5e_resume(adev); if (err) { - mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err); + mlx5_core_err(mdev, "_mlx5e_resume failed, %d\n", err); goto err_profile_cleanup; } @@ -6104,15 +6185,29 @@ err_devlink_unregister: static int mlx5e_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { - return _mlx5e_probe(adev); + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct auxiliary_device *actual_adev; + int err; + + err = mlx5_sd_init(mdev); + if (err) + return err; + + actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); + if (actual_adev) + return _mlx5e_probe(actual_adev); + return 0; } -static void mlx5e_remove(struct auxiliary_device *adev) +static void _mlx5e_remove(struct auxiliary_device *adev) { + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); struct mlx5e_priv *priv = mlx5e_dev->priv; + struct mlx5_core_dev *mdev = edev->mdev; - mlx5_core_uplink_netdev_set(priv->mdev, NULL); + mlx5_core_uplink_netdev_set(mdev, NULL); mlx5e_dcbnl_delete_app(priv); unregister_netdev(priv->netdev); _mlx5e_suspend(adev); @@ -6122,6 +6217,19 @@ static void mlx5e_remove(struct auxiliary_device *adev) mlx5e_destroy_devlink(mlx5e_dev); } +static void mlx5e_remove(struct auxiliary_device *adev) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct auxiliary_device *actual_adev; + + actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); + if (actual_adev) + _mlx5e_remove(actual_adev); + + mlx5_sd_cleanup(mdev); +} + static const struct auxiliary_device_id mlx5e_id_table[] = { { .name = MLX5_ADEV_NAME ".eth", }, {}, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index ddcc2f6a11c2..05527418fa64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -78,7 +78,7 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - if (count == sizeof(drvinfo->fw_version)) + if (count >= sizeof(drvinfo->fw_version)) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4b96ad657145..f3d0898bdbc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -561,11 +561,23 @@ static const struct counter_desc drop_rq_stats_desc[] = { #define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) #define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc) +static bool q_counter_any(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *pos; + int i; + + mlx5_sd_for_each_dev(i, priv->mdev, pos) + if (priv->q_counter[i++]) + return true; + + return false; +} + static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt) { int num_stats = 0; - if (priv->q_counter) + if (q_counter_any(priv)) num_stats += NUM_Q_COUNTERS; if (priv->drop_rq_q_counter) @@ -578,7 +590,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt) { int i; - for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + for (i = 0; i < NUM_Q_COUNTERS && q_counter_any(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); @@ -593,7 +605,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt) { int i; - for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + for (i = 0; i < NUM_Q_COUNTERS && q_counter_any(priv); i++) data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i); for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++) @@ -607,18 +619,23 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt) struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; - int ret; + struct mlx5_core_dev *pos; + u32 rx_out_of_buffer = 0; + int ret, i; MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); - if (priv->q_counter) { - MLX5_SET(query_q_counter_in, in, counter_set_id, - priv->q_counter); - ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out); - if (!ret) - qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, - out, out_of_buffer); + mlx5_sd_for_each_dev(i, priv->mdev, pos) { + if (priv->q_counter[i]) { + MLX5_SET(query_q_counter_in, in, counter_set_id, + priv->q_counter[i]); + ret = mlx5_cmd_exec_inout(pos, query_q_counter, in, out); + if (!ret) + rx_out_of_buffer += MLX5_GET(query_q_counter_out, + out, out_of_buffer); + } } + qcnt->rx_out_of_buffer = rx_out_of_buffer; if (priv->drop_rq_q_counter) { MLX5_SET(query_q_counter_in, in, counter_set_id, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 85cdba226eac..b8ceb972df9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -766,7 +766,7 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) return err; mlx5e_rss_params_indir_init_uniform(&indir, hp->num_channels); - err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, + err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, NULL, hp->num_channels, mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, &indir); @@ -1169,7 +1169,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); - params.q_counter = priv->q_counter; + params.q_counter = priv->q_counter[0]; err = devl_param_driverinit_value_get( devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); if (err) { @@ -3778,7 +3778,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, break; case FLOW_ACTION_ACCEPT: case FLOW_ACTION_PIPE: - if (set_branch_dest_ft(flow->priv, attr)) + err = set_branch_dest_ft(flow->priv, attr); + if (err) goto out_err; break; case FLOW_ACTION_JUMP: @@ -3788,7 +3789,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, goto out_err; } *jump_count = cond->extval; - if (set_branch_dest_ft(flow->priv, attr)) + err = set_branch_dest_ft(flow->priv, attr); + if (err) goto out_err; break; default: @@ -5720,8 +5722,10 @@ int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_a esw = priv->mdev->priv.eswitch; attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); - if (IS_ERR(attr->act_id_restore_rule)) + if (IS_ERR(attr->act_id_restore_rule)) { + err = PTR_ERR(attr->act_id_restore_rule); goto err_rule; + } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 3047d7015c52..3bf419d06d53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1665,7 +1665,7 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 * void *hca_caps; int err; - if (!mlx5_core_is_ecpf(dev)) { + if (!mlx5_core_is_ecpf(dev) || mlx5_core_is_mgmt_pf(dev)) { *max_sfs = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b674b57d05aa..b4eb17141edf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -526,7 +526,8 @@ struct mlx5_esw_flow_attr { u8 total_vlan; struct { u32 flags; - struct mlx5_eswitch_rep *rep; + bool vport_valid; + u16 vport; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; struct mlx5_termtbl_handle *termtbl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bb8bcb448ae9..b0455134c98e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -287,10 +287,9 @@ static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_ for (i = from; i < to; i++) if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5_chains_put_table(chains, 0, 1, 0); - else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, esw_attr->dests[i].mdev)) - mlx5_esw_indir_table_put(esw, esw_attr->dests[i].rep->vport, - false); + mlx5_esw_indir_table_put(esw, esw_attr->dests[i].vport, false); } static bool @@ -358,8 +357,8 @@ esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) * this criteria. */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { - if (esw_attr->dests[i].rep && - mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + if (esw_attr->dests[i].vport_valid && + mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, esw_attr->dests[i].mdev)) { result = true; } else { @@ -388,7 +387,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, - esw_attr->dests[j].rep->vport, false); + esw_attr->dests[j].vport, false); if (IS_ERR(dest[*i].ft)) { err = PTR_ERR(dest[*i].ft); goto err_indir_tbl_get; @@ -432,11 +431,11 @@ static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw, int attr_idx) { if (esw->offloads.ft_ipsec_tx_pol && - esw_attr->dests[attr_idx].rep && - esw_attr->dests[attr_idx].rep->vport == MLX5_VPORT_UPLINK && + esw_attr->dests[attr_idx].vport_valid && + esw_attr->dests[attr_idx].vport == MLX5_VPORT_UPLINK && /* To be aligned with software, encryption is needed only for tunnel device */ (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) && - esw_attr->dests[attr_idx].rep != esw_attr->in_rep && + esw_attr->dests[attr_idx].vport != esw_attr->in_rep->vport && esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev)) return true; @@ -469,7 +468,7 @@ esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_ac int attr_idx, int dest_idx, bool pkt_reformat) { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].vport; if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { dest[dest_idx].vport.vhca_id = MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); @@ -1177,9 +1176,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_flow_handle *flow; struct mlx5_flow_spec *spec; struct mlx5_vport *vport; + int err, pfindex; unsigned long i; void *misc; - int err; if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) return 0; @@ -1255,7 +1254,15 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } } - esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; + + pfindex = mlx5_get_dev_index(peer_dev); + if (pfindex >= MLX5_MAX_PORTS) { + esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n", + pfindex, MLX5_MAX_PORTS); + err = -EINVAL; + goto add_ec_vf_flow_err; + } + esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows; kvfree(spec); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index edd910258314..40bdc677f051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -233,8 +233,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, /* hairpin */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) - if (!esw_attr->dest_int_port && esw_attr->dests[i].rep && - esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + if (!esw_attr->dest_int_port && esw_attr->dests[i].vport_valid && + esw_attr->dests[i].vport == MLX5_VPORT_UPLINK) return true; return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 58845121954c..d77be1b4dd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -783,7 +783,7 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, } /* This should only be called once per mdev */ - err = mlx5e_create_mdev_resources(mdev); + err = mlx5e_create_mdev_resources(mdev, false); if (err) goto destroy_ht; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index ec32b686f586..d58032dd0df7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -10,6 +10,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_ESW_OFFLOADS, MLX5_DEVCOM_MPV, MLX5_DEVCOM_HCA_PORTS, + MLX5_DEVCOM_SD_GROUP, MLX5_DEVCOM_NUM_COMPONENTS, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 2b5826a785c4..37d5f445598c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -54,4 +54,16 @@ static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *md { return mdev->mlx5e_res.uplink_netdev; } + +struct mlx5_sd; + +static inline struct mlx5_sd *mlx5_get_sd(struct mlx5_core_dev *dev) +{ + return dev->sd; +} + +static inline void mlx5_set_sd(struct mlx5_core_dev *dev, struct mlx5_sd *sd) +{ + dev->sd = sd; +} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c new file mode 100644 index 000000000000..f68942277c62 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "lib/sd.h" +#include "mlx5_core.h" +#include "lib/mlx5.h" +#include "fs_cmd.h" +#include <linux/mlx5/vport.h> + +#define sd_info(__dev, format, ...) \ + dev_info((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__) +#define sd_warn(__dev, format, ...) \ + dev_warn((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__) + +struct mlx5_sd { + u32 group_id; + u8 host_buses; + struct mlx5_devcom_comp_dev *devcom; + bool primary; + union { + struct { /* primary */ + struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1]; + struct mlx5_flow_table *tx_ft; + }; + struct { /* secondary */ + struct mlx5_core_dev *primary_dev; + u32 alias_obj_id; + }; + }; +}; + +static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + + if (!sd) + return 1; + + return sd->host_buses; +} + +static struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + + if (!sd) + return dev; + + return sd->primary ? dev : sd->primary_dev; +} + +struct mlx5_core_dev * +mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx) +{ + struct mlx5_sd *sd; + + if (idx == 0) + return primary; + + if (idx >= mlx5_sd_get_host_buses(primary)) + return NULL; + + sd = mlx5_get_sd(primary); + return sd->secondaries[idx - 1]; +} + +int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix) +{ + return ch_ix % mlx5_sd_get_host_buses(dev); +} + +int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix) +{ + return ch_ix / mlx5_sd_get_host_buses(dev); +} + +struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int ch_ix) +{ + int mdev_idx = mlx5_sd_ch_ix_get_dev_ix(primary, ch_ix); + + return mlx5_sd_primary_get_peer(primary, mdev_idx); +} + +static bool ft_create_alias_supported(struct mlx5_core_dev *dev) +{ + u64 obj_allowed = MLX5_CAP_GEN_2_64(dev, allowed_object_for_other_vhca_access); + u32 obj_supp = MLX5_CAP_GEN_2(dev, cross_vhca_object_to_object_supported); + + if (!(obj_supp & + MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE)) + return false; + + if (!(obj_allowed & MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE)) + return false; + + return true; +} + +static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) +{ + /* Feature is currently implemented for PFs only */ + if (!mlx5_core_is_pf(dev)) + return false; + + /* Honor the SW implementation limit */ + if (host_buses > MLX5_SD_MAX_GROUP_SZ) + return false; + + /* Disconnect secondaries from the network */ + if (!MLX5_CAP_GEN(dev, eswitch_manager)) + return false; + if (!MLX5_CAP_GEN(dev, silent_mode)) + return false; + + /* RX steering from primary to secondaries */ + if (!MLX5_CAP_GEN(dev, cross_vhca_rqt)) + return false; + if (host_buses > MLX5_CAP_GEN_2(dev, max_rqt_vhca_id)) + return false; + + /* TX steering from secondaries to primary */ + if (!ft_create_alias_supported(dev)) + return false; + if (!MLX5_CAP_FLOWTABLE_NIC_TX(dev, reset_root_to_default)) + return false; + + return true; +} + +static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, + u8 *host_buses, u8 *sd_group) +{ + u32 out[MLX5_ST_SZ_DW(mpir_reg)]; + int err; + + err = mlx5_query_mpir_reg(dev, out); + if (err) + return err; + + err = mlx5_query_nic_vport_sd_group(dev, sd_group); + if (err) + return err; + + *sdm = MLX5_GET(mpir_reg, out, sdm); + *host_buses = MLX5_GET(mpir_reg, out, host_buses); + + return 0; +} + +static u32 mlx5_sd_group_id(struct mlx5_core_dev *dev, u8 sd_group) +{ + return (u32)((MLX5_CAP_GEN(dev, native_port_num) << 8) | sd_group); +} + +static int sd_init(struct mlx5_core_dev *dev) +{ + u8 host_buses, sd_group; + struct mlx5_sd *sd; + u32 group_id; + bool sdm; + int err; + + err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group); + if (err) + return err; + + if (!sdm) + return 0; + + if (!sd_group) + return 0; + + group_id = mlx5_sd_group_id(dev, sd_group); + + if (!mlx5_sd_is_supported(dev, host_buses)) { + sd_warn(dev, "can't support requested netdev combining for group id 0x%x), skipping\n", + group_id); + return 0; + } + + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) + return -ENOMEM; + + sd->host_buses = host_buses; + sd->group_id = group_id; + + mlx5_set_sd(dev, sd); + + return 0; +} + +static void sd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + + mlx5_set_sd(dev, NULL); + kfree(sd); +} + +static int sd_register(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_comp_dev *devcom, *pos; + struct mlx5_core_dev *peer, *primary; + struct mlx5_sd *sd, *primary_sd; + int err, i; + + sd = mlx5_get_sd(dev); + devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP, + sd->group_id, NULL, dev); + if (!devcom) + return -ENOMEM; + + sd->devcom = devcom; + + if (mlx5_devcom_comp_get_size(devcom) != sd->host_buses) + return 0; + + mlx5_devcom_comp_lock(devcom); + mlx5_devcom_comp_set_ready(devcom, true); + mlx5_devcom_comp_unlock(devcom); + + if (!mlx5_devcom_for_each_peer_begin(devcom)) { + err = -ENODEV; + goto err_devcom_unreg; + } + + primary = dev; + mlx5_devcom_for_each_peer_entry(devcom, peer, pos) + if (peer->pdev->bus->number < primary->pdev->bus->number) + primary = peer; + + primary_sd = mlx5_get_sd(primary); + primary_sd->primary = true; + i = 0; + /* loop the secondaries */ + mlx5_devcom_for_each_peer_entry(primary_sd->devcom, peer, pos) { + struct mlx5_sd *peer_sd = mlx5_get_sd(peer); + + primary_sd->secondaries[i++] = peer; + peer_sd->primary = false; + peer_sd->primary_dev = primary; + } + + mlx5_devcom_for_each_peer_end(devcom); + return 0; + +err_devcom_unreg: + mlx5_devcom_comp_lock(sd->devcom); + mlx5_devcom_comp_set_ready(sd->devcom, false); + mlx5_devcom_comp_unlock(sd->devcom); + mlx5_devcom_unregister_component(sd->devcom); + return err; +} + +static void sd_unregister(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + + mlx5_devcom_comp_lock(sd->devcom); + mlx5_devcom_comp_set_ready(sd->devcom, false); + mlx5_devcom_comp_unlock(sd->devcom); + mlx5_devcom_unregister_component(sd->devcom); +} + +static int sd_cmd_set_primary(struct mlx5_core_dev *primary, u8 *alias_key) +{ + struct mlx5_cmd_allow_other_vhca_access_attr allow_attr = {}; + struct mlx5_sd *sd = mlx5_get_sd(primary); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *nic_ns; + struct mlx5_flow_table *ft; + int err; + + nic_ns = mlx5_get_flow_namespace(primary, MLX5_FLOW_NAMESPACE_EGRESS); + if (!nic_ns) + return -EOPNOTSUPP; + + ft = mlx5_create_flow_table(nic_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + return err; + } + sd->tx_ft = ft; + memcpy(allow_attr.access_key, alias_key, ACCESS_KEY_LEN); + allow_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS; + allow_attr.obj_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id; + + err = mlx5_cmd_allow_other_vhca_access(primary, &allow_attr); + if (err) { + mlx5_core_err(primary, "Failed to allow other vhca access err=%d\n", + err); + mlx5_destroy_flow_table(ft); + return err; + } + + return 0; +} + +static void sd_cmd_unset_primary(struct mlx5_core_dev *primary) +{ + struct mlx5_sd *sd = mlx5_get_sd(primary); + + mlx5_destroy_flow_table(sd->tx_ft); +} + +static int sd_secondary_create_alias_ft(struct mlx5_core_dev *secondary, + struct mlx5_core_dev *primary, + struct mlx5_flow_table *ft, + u32 *obj_id, u8 *alias_key) +{ + u32 aliased_object_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id; + u16 vhca_id_to_be_accessed = MLX5_CAP_GEN(primary, vhca_id); + struct mlx5_cmd_alias_obj_create_attr alias_attr = {}; + int ret; + + memcpy(alias_attr.access_key, alias_key, ACCESS_KEY_LEN); + alias_attr.obj_id = aliased_object_id; + alias_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS; + alias_attr.vhca_id = vhca_id_to_be_accessed; + ret = mlx5_cmd_alias_obj_create(secondary, &alias_attr, obj_id); + if (ret) { + mlx5_core_err(secondary, "Failed to create alias object err=%d\n", + ret); + return ret; + } + + return 0; +} + +static void sd_secondary_destroy_alias_ft(struct mlx5_core_dev *secondary) +{ + struct mlx5_sd *sd = mlx5_get_sd(secondary); + + mlx5_cmd_alias_obj_destroy(secondary, sd->alias_obj_id, + MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); +} + +static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary, + struct mlx5_core_dev *primary, + u8 *alias_key) +{ + struct mlx5_sd *primary_sd = mlx5_get_sd(primary); + struct mlx5_sd *sd = mlx5_get_sd(secondary); + int err; + + err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1); + if (err) + return err; + + err = sd_secondary_create_alias_ft(secondary, primary, primary_sd->tx_ft, + &sd->alias_obj_id, alias_key); + if (err) + goto err_unset_silent; + + err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id, false); + if (err) + goto err_destroy_alias_ft; + + return 0; + +err_destroy_alias_ft: + sd_secondary_destroy_alias_ft(secondary); +err_unset_silent: + mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0); + return err; +} + +static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary) +{ + mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true); + sd_secondary_destroy_alias_ft(secondary); + mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0); +} + +static void sd_print_group(struct mlx5_core_dev *primary) +{ + struct mlx5_sd *sd = mlx5_get_sd(primary); + struct mlx5_core_dev *pos; + int i; + + sd_info(primary, "group id %#x, primary %s, vhca %u\n", + sd->group_id, pci_name(primary->pdev), + MLX5_CAP_GEN(primary, vhca_id)); + mlx5_sd_for_each_secondary(i, primary, pos) + sd_info(primary, "group id %#x, secondary#%d %s, vhca %u\n", + sd->group_id, i - 1, pci_name(pos->pdev), + MLX5_CAP_GEN(pos, vhca_id)); +} + +int mlx5_sd_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *primary, *pos, *to; + struct mlx5_sd *sd = mlx5_get_sd(dev); + u8 alias_key[ACCESS_KEY_LEN]; + int err, i; + + err = sd_init(dev); + if (err) + return err; + + sd = mlx5_get_sd(dev); + if (!sd) + return 0; + + err = sd_register(dev); + if (err) + goto err_sd_cleanup; + + if (!mlx5_devcom_comp_is_ready(sd->devcom)) + return 0; + + primary = mlx5_sd_get_primary(dev); + + for (i = 0; i < ACCESS_KEY_LEN; i++) + alias_key[i] = get_random_u8(); + + err = sd_cmd_set_primary(primary, alias_key); + if (err) + goto err_sd_unregister; + + mlx5_sd_for_each_secondary(i, primary, pos) { + err = sd_cmd_set_secondary(pos, primary, alias_key); + if (err) + goto err_unset_secondaries; + } + + sd_info(primary, "group id %#x, size %d, combined\n", + sd->group_id, mlx5_devcom_comp_get_size(sd->devcom)); + sd_print_group(primary); + + return 0; + +err_unset_secondaries: + to = pos; + mlx5_sd_for_each_secondary_to(i, primary, to, pos) + sd_cmd_unset_secondary(pos); + sd_cmd_unset_primary(primary); +err_sd_unregister: + sd_unregister(dev); +err_sd_cleanup: + sd_cleanup(dev); + return err; +} + +void mlx5_sd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + struct mlx5_core_dev *primary, *pos; + int i; + + if (!sd) + return; + + if (!mlx5_devcom_comp_is_ready(sd->devcom)) + goto out; + + primary = mlx5_sd_get_primary(dev); + mlx5_sd_for_each_secondary(i, primary, pos) + sd_cmd_unset_secondary(pos); + sd_cmd_unset_primary(primary); + + sd_info(primary, "group id %#x, uncombined\n", sd->group_id); +out: + sd_unregister(dev); + sd_cleanup(dev); +} + +struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, + struct auxiliary_device *adev, + int idx) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + struct mlx5_core_dev *primary; + + if (!sd) + return adev; + + if (!mlx5_devcom_comp_is_ready(sd->devcom)) + return NULL; + + primary = mlx5_sd_get_primary(dev); + if (dev == primary) + return adev; + + return &primary->priv.adev[idx]->adev; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h new file mode 100644 index 000000000000..137efaf9aabc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_SD_H__ +#define __MLX5_LIB_SD_H__ + +#define MLX5_SD_MAX_GROUP_SZ 2 + +struct mlx5_sd; + +struct mlx5_core_dev *mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx); +int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix); +int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix); +struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int ch_ix); +struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, + struct auxiliary_device *adev, + int idx); + +int mlx5_sd_init(struct mlx5_core_dev *dev); +void mlx5_sd_cleanup(struct mlx5_core_dev *dev); + +#define mlx5_sd_for_each_dev_from_to(i, primary, ix_from, to, pos) \ + for (i = ix_from; \ + (pos = mlx5_sd_primary_get_peer(primary, i)) && pos != (to); i++) + +#define mlx5_sd_for_each_dev(i, primary, pos) \ + mlx5_sd_for_each_dev_from_to(i, primary, 0, NULL, pos) + +#define mlx5_sd_for_each_dev_to(i, primary, to, pos) \ + mlx5_sd_for_each_dev_from_to(i, primary, 0, to, pos) + +#define mlx5_sd_for_each_secondary(i, primary, pos) \ + mlx5_sd_for_each_dev_from_to(i, primary, 1, NULL, pos) + +#define mlx5_sd_for_each_secondary_to(i, primary, to, pos) \ + mlx5_sd_for_each_dev_from_to(i, primary, 1, to, pos) + +#endif /* __MLX5_LIB_SD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 5a31fb47ffa5..1005bb6935b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -277,7 +277,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, req_list_size = max_list_size; } - out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) + + out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); out = kvzalloc(out_sz, GFP_KERNEL); @@ -440,6 +440,27 @@ out: } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group) +{ + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; + + *sd_group = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.sd_group); +out: + kvfree(out); + return err; +} + int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) { u32 *out; diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index fecd43754cea..e5ec0a363aff 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -350,6 +350,8 @@ union ks8851_tx_hdr { * @rxd: Space for receiving SPI data, in DMA-able space. * @txd: Space for transmitting SPI data, in DMA-able space. * @msg_enable: The message flags controlling driver output (see ethtool). + * @tx_space: Free space in the hardware TX buffer (cached copy of KS_TXMIR). + * @queued_len: Space required in hardware TX buffer for queued packets in txq. * @fid: Incrementing frame id tag. * @rc_ier: Cached copy of KS_IER. * @rc_ccr: Cached copy of KS_CCR. @@ -399,6 +401,7 @@ struct ks8851_net { struct work_struct rxctrl_work; struct sk_buff_head txq; + unsigned int queued_len; struct eeprom_93cx6 eeprom; struct regulator *vdd_reg; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index cfbc900d4aeb..0bf13b38b8f5 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -362,16 +362,18 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) handled |= IRQ_RXPSI; if (status & IRQ_TXI) { - handled |= IRQ_TXI; + unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR); - /* no lock here, tx queue should have been stopped */ + netif_dbg(ks, intr, ks->netdev, + "%s: txspace %d\n", __func__, tx_space); - /* update our idea of how much tx space is available to the - * system */ - ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); + spin_lock(&ks->statelock); + ks->tx_space = tx_space; + if (netif_queue_stopped(ks->netdev)) + netif_wake_queue(ks->netdev); + spin_unlock(&ks->statelock); - netif_dbg(ks, intr, ks->netdev, - "%s: txspace %d\n", __func__, ks->tx_space); + handled |= IRQ_TXI; } if (status & IRQ_RXI) @@ -414,9 +416,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); - if (status & IRQ_TXI) - netif_wake_queue(ks->netdev); - return IRQ_HANDLED; } @@ -500,6 +499,7 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_ISR, ks->rc_ier); ks8851_wrreg16(ks, KS_IER, ks->rc_ier); + ks->queued_len = 0; netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 70bc7253454f..88e26c120b48 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -287,6 +287,18 @@ static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp, } /** + * calc_txlen - calculate size of message to send packet + * @len: Length of data + * + * Returns the size of the TXFIFO message needed to send + * this packet. + */ +static unsigned int calc_txlen(unsigned int len) +{ + return ALIGN(len + 4, 4); +} + +/** * ks8851_rx_skb_spi - receive skbuff * @ks: The device state * @skb: The skbuff @@ -305,7 +317,9 @@ static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) */ static void ks8851_tx_work(struct work_struct *work) { + unsigned int dequeued_len = 0; struct ks8851_net_spi *kss; + unsigned short tx_space; struct ks8851_net *ks; unsigned long flags; struct sk_buff *txb; @@ -322,6 +336,8 @@ static void ks8851_tx_work(struct work_struct *work) last = skb_queue_empty(&ks->txq); if (txb) { + dequeued_len += calc_txlen(txb->len); + ks8851_wrreg16_spi(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); ks8851_wrfifo_spi(ks, txb, last); @@ -332,6 +348,13 @@ static void ks8851_tx_work(struct work_struct *work) } } + tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); + + spin_lock(&ks->statelock); + ks->queued_len -= dequeued_len; + ks->tx_space = tx_space; + spin_unlock(&ks->statelock); + ks8851_unlock_spi(ks, &flags); } @@ -347,18 +370,6 @@ static void ks8851_flush_tx_work_spi(struct ks8851_net *ks) } /** - * calc_txlen - calculate size of message to send packet - * @len: Length of data - * - * Returns the size of the TXFIFO message needed to send - * this packet. - */ -static unsigned int calc_txlen(unsigned int len) -{ - return ALIGN(len + 4, 4); -} - -/** * ks8851_start_xmit_spi - transmit packet using SPI * @skb: The buffer to transmit * @dev: The device used to transmit the packet. @@ -386,16 +397,17 @@ static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb, spin_lock(&ks->statelock); - if (needed > ks->tx_space) { + if (ks->queued_len + needed > ks->tx_space) { netif_stop_queue(dev); ret = NETDEV_TX_BUSY; } else { - ks->tx_space -= needed; + ks->queued_len += needed; skb_queue_tail(&ks->txq, skb); } spin_unlock(&ks->statelock); - schedule_work(&kss->tx_work); + if (ret == NETDEV_TX_OK) + schedule_work(&kss->tx_work); return ret; } diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 090e6b983243..01eb7445ead9 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -20,6 +20,7 @@ config MICROSOFT_MANA depends on PCI_MSI && X86_64 depends on PCI_HYPERV select AUXILIARY_BUS + select PAGE_POOL help This driver supports Microsoft Azure Network Adapter (MANA). So far, the driver is only supported on X86_64. diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index 5c55197c7327..c018783757fb 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -582,10 +582,10 @@ static void ocelot_port_rmon_stats_cb(struct ocelot *ocelot, int port, void *pri rmon_stats->hist_tx[0] = s[OCELOT_STAT_TX_64]; rmon_stats->hist_tx[1] = s[OCELOT_STAT_TX_65_127]; rmon_stats->hist_tx[2] = s[OCELOT_STAT_TX_128_255]; - rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_128_255]; - rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_256_511]; - rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_512_1023]; - rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_1024_1526]; + rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_256_511]; + rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_512_1023]; + rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_1024_1526]; + rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_1527_MAX]; } static void ocelot_port_pmac_rmon_stats_cb(struct ocelot *ocelot, int port, @@ -610,10 +610,10 @@ static void ocelot_port_pmac_rmon_stats_cb(struct ocelot *ocelot, int port, rmon_stats->hist_tx[0] = s[OCELOT_STAT_TX_PMAC_64]; rmon_stats->hist_tx[1] = s[OCELOT_STAT_TX_PMAC_65_127]; rmon_stats->hist_tx[2] = s[OCELOT_STAT_TX_PMAC_128_255]; - rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_PMAC_128_255]; - rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_PMAC_256_511]; - rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_PMAC_512_1023]; - rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_PMAC_1024_1526]; + rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_PMAC_256_511]; + rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_PMAC_512_1023]; + rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_PMAC_1024_1526]; + rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_PMAC_1527_MAX]; } void ocelot_port_get_rmon_stats(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig index 93d9df55b361..03015b665f4e 100644 --- a/drivers/net/ethernet/realtek/Kconfig +++ b/drivers/net/ethernet/realtek/Kconfig @@ -113,4 +113,11 @@ config R8169 To compile this driver as a module, choose M here: the module will be called r8169. This is recommended. +config R8169_LEDS + def_bool R8169 && LEDS_TRIGGER_NETDEV + depends on !(R8169=y && LEDS_CLASS=m) + help + Optional support for controlling the NIC LED's with the netdev + LED trigger. + endif # NET_VENDOR_REALTEK diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile index 2e1d78b106b0..635491d8826e 100644 --- a/drivers/net/ethernet/realtek/Makefile +++ b/drivers/net/ethernet/realtek/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_8139CP) += 8139cp.o obj-$(CONFIG_8139TOO) += 8139too.o obj-$(CONFIG_ATP) += atp.o -r8169-objs += r8169_main.o r8169_firmware.o r8169_phy_config.o +r8169-y += r8169_main.o r8169_firmware.o r8169_phy_config.o +r8169-$(CONFIG_R8169_LEDS) += r8169_leds.o obj-$(CONFIG_R8169) += r8169.o diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 55ef8251feb5..81567fcf3957 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -8,6 +8,7 @@ * See MAINTAINERS file for support contact information. */ +#include <linux/netdevice.h> #include <linux/types.h> #include <linux/phy.h> @@ -77,3 +78,9 @@ u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp); u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr); void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, enum mac_version ver); + +void r8169_get_led_name(struct rtl8169_private *tp, int idx, + char *buf, int buf_len); +int rtl8168_get_led_mode(struct rtl8169_private *tp); +int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val); +void rtl8168_init_leds(struct net_device *ndev); diff --git a/drivers/net/ethernet/realtek/r8169_leds.c b/drivers/net/ethernet/realtek/r8169_leds.c new file mode 100644 index 000000000000..007d077edcad --- /dev/null +++ b/drivers/net/ethernet/realtek/r8169_leds.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* r8169_leds.c: Realtek 8169/8168/8101/8125 ethernet driver. + * + * Copyright (c) 2023 Heiner Kallweit <hkallweit1@gmail.com> + * + * See MAINTAINERS file for support contact information. + */ + +#include <linux/leds.h> +#include <linux/netdevice.h> +#include <uapi/linux/uleds.h> + +#include "r8169.h" + +#define RTL8168_LED_CTRL_OPTION2 BIT(15) +#define RTL8168_LED_CTRL_ACT BIT(3) +#define RTL8168_LED_CTRL_LINK_1000 BIT(2) +#define RTL8168_LED_CTRL_LINK_100 BIT(1) +#define RTL8168_LED_CTRL_LINK_10 BIT(0) + +#define RTL8168_NUM_LEDS 3 + +#define RTL8168_SUPPORTED_MODES \ + (BIT(TRIGGER_NETDEV_LINK_1000) | BIT(TRIGGER_NETDEV_LINK_100) | \ + BIT(TRIGGER_NETDEV_LINK_10) | BIT(TRIGGER_NETDEV_RX) | \ + BIT(TRIGGER_NETDEV_TX)) + +struct r8169_led_classdev { + struct led_classdev led; + struct net_device *ndev; + int index; +}; + +#define lcdev_to_r8169_ldev(lcdev) container_of(lcdev, struct r8169_led_classdev, led) + +static int rtl8168_led_hw_control_is_supported(struct led_classdev *led_cdev, + unsigned long flags) +{ + struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev); + struct rtl8169_private *tp = netdev_priv(ldev->ndev); + int shift = ldev->index * 4; + bool rx, tx; + + if (flags & ~RTL8168_SUPPORTED_MODES) + goto nosupp; + + rx = flags & BIT(TRIGGER_NETDEV_RX); + tx = flags & BIT(TRIGGER_NETDEV_TX); + if (rx != tx) + goto nosupp; + + return 0; + +nosupp: + /* Switch LED off to indicate that mode isn't supported */ + rtl8168_led_mod_ctrl(tp, 0x000f << shift, 0); + return -EOPNOTSUPP; +} + +static int rtl8168_led_hw_control_set(struct led_classdev *led_cdev, + unsigned long flags) +{ + struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev); + struct rtl8169_private *tp = netdev_priv(ldev->ndev); + int shift = ldev->index * 4; + u16 mode = 0; + + if (flags & BIT(TRIGGER_NETDEV_LINK_10)) + mode |= RTL8168_LED_CTRL_LINK_10; + if (flags & BIT(TRIGGER_NETDEV_LINK_100)) + mode |= RTL8168_LED_CTRL_LINK_100; + if (flags & BIT(TRIGGER_NETDEV_LINK_1000)) + mode |= RTL8168_LED_CTRL_LINK_1000; + if (flags & BIT(TRIGGER_NETDEV_TX)) + mode |= RTL8168_LED_CTRL_ACT; + + return rtl8168_led_mod_ctrl(tp, 0x000f << shift, mode << shift); +} + +static int rtl8168_led_hw_control_get(struct led_classdev *led_cdev, + unsigned long *flags) +{ + struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev); + struct rtl8169_private *tp = netdev_priv(ldev->ndev); + int shift = ldev->index * 4; + int mode; + + mode = rtl8168_get_led_mode(tp); + if (mode < 0) + return mode; + + if (mode & RTL8168_LED_CTRL_OPTION2) { + rtl8168_led_mod_ctrl(tp, RTL8168_LED_CTRL_OPTION2, 0); + netdev_notice(ldev->ndev, "Deactivating unsupported Option2 LED mode\n"); + } + + mode = (mode >> shift) & 0x000f; + + if (mode & RTL8168_LED_CTRL_ACT) + *flags |= BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX); + + if (mode & RTL8168_LED_CTRL_LINK_10) + *flags |= BIT(TRIGGER_NETDEV_LINK_10); + if (mode & RTL8168_LED_CTRL_LINK_100) + *flags |= BIT(TRIGGER_NETDEV_LINK_100); + if (mode & RTL8168_LED_CTRL_LINK_1000) + *flags |= BIT(TRIGGER_NETDEV_LINK_1000); + + return 0; +} + +static struct device * + r8169_led_hw_control_get_device(struct led_classdev *led_cdev) +{ + struct r8169_led_classdev *ldev = lcdev_to_r8169_ldev(led_cdev); + + return &ldev->ndev->dev; +} + +static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev, + struct net_device *ndev, int index) +{ + struct rtl8169_private *tp = netdev_priv(ndev); + struct led_classdev *led_cdev = &ldev->led; + char led_name[LED_MAX_NAME_SIZE]; + + ldev->ndev = ndev; + ldev->index = index; + + r8169_get_led_name(tp, index, led_name, LED_MAX_NAME_SIZE); + led_cdev->name = led_name; + led_cdev->default_trigger = "netdev"; + led_cdev->hw_control_trigger = "netdev"; + led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN; + led_cdev->hw_control_is_supported = rtl8168_led_hw_control_is_supported; + led_cdev->hw_control_set = rtl8168_led_hw_control_set; + led_cdev->hw_control_get = rtl8168_led_hw_control_get; + led_cdev->hw_control_get_device = r8169_led_hw_control_get_device; + + /* ignore errors */ + devm_led_classdev_register(&ndev->dev, led_cdev); +} + +void rtl8168_init_leds(struct net_device *ndev) +{ + /* bind resource mgmt to netdev */ + struct device *dev = &ndev->dev; + struct r8169_led_classdev *leds; + int i; + + leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL); + if (!leds) + return; + + for (i = 0; i < RTL8168_NUM_LEDS; i++) + rtl8168_setup_ldev(leds + i, ndev, i); +} diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c07f82ca9755..349afb157fee 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -285,6 +285,7 @@ enum rtl8168_8101_registers { }; enum rtl8168_registers { + LED_CTRL = 0x18, LED_FREQ = 0x1a, EEE_LED = 0x1b, ERIDR = 0x70, @@ -616,6 +617,7 @@ struct rtl8169_private { raw_spinlock_t config25_lock; raw_spinlock_t mac_ocp_lock; + struct mutex led_lock; /* serialize LED ctrl RMW access */ raw_spinlock_t cfg9346_usage_lock; int cfg9346_usage_count; @@ -788,6 +790,62 @@ static const struct rtl_cond name = { \ \ static bool name ## _check(struct rtl8169_private *tp) +int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val) +{ + struct device *dev = tp_to_dev(tp); + int ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + mutex_lock(&tp->led_lock); + RTL_W16(tp, LED_CTRL, (RTL_R16(tp, LED_CTRL) & ~mask) | val); + mutex_unlock(&tp->led_lock); + + pm_runtime_put_sync(dev); + + return 0; +} + +int rtl8168_get_led_mode(struct rtl8169_private *tp) +{ + struct device *dev = tp_to_dev(tp); + int ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + ret = RTL_R16(tp, LED_CTRL); + + pm_runtime_put_sync(dev); + + return ret; +} + +void r8169_get_led_name(struct rtl8169_private *tp, int idx, + char *buf, int buf_len) +{ + struct pci_dev *pdev = tp->pci_dev; + char pdom[8], pfun[8]; + int domain; + + domain = pci_domain_nr(pdev->bus); + if (domain) + snprintf(pdom, sizeof(pdom), "P%d", domain); + else + pdom[0] = '\0'; + + if (pdev->multifunction) + snprintf(pfun, sizeof(pfun), "f%d", PCI_FUNC(pdev->devfn)); + else + pfun[0] = '\0'; + + snprintf(buf, buf_len, "en%sp%ds%d%s-%d::lan", pdom, pdev->bus->number, + PCI_SLOT(pdev->devfn), pfun, idx); +} + static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int type) { /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ @@ -5141,6 +5199,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) raw_spin_lock_init(&tp->cfg9346_usage_lock); raw_spin_lock_init(&tp->config25_lock); raw_spin_lock_init(&tp->mac_ocp_lock); + mutex_init(&tp->led_lock); dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, struct pcpu_sw_netstats); @@ -5297,6 +5356,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; + if (IS_ENABLED(CONFIG_R8169_LEDS) && + tp->mac_version > RTL_GIGA_MAC_VER_06 && + tp->mac_version < RTL_GIGA_MAC_VER_61) + rtl8168_init_leds(dev); + netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n", rtl_chip_infos[chipset].name, dev->dev_addr, xid, tp->irq); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 540f6a4ec0b8..f05bd757dfe5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -237,7 +237,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv) */ ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS); - if (priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN) + if (!(priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN)) return; num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index e60b557d59b9..be01450c20dc 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -134,14 +134,16 @@ config TI_K3_AM65_CPTS protocol, Ethernet Enhanced Scheduled Traffic Operations (CPTS_ESTFn) and PCIe Subsystem Precision Time Measurement (PTM). -config TI_AM65_CPSW_TAS - bool "Enable TAS offload in AM65 CPSW" +config TI_AM65_CPSW_QOS + bool "Enable QoS offload features in AM65 CPSW" depends on TI_K3_AM65_CPSW_NUSS && NET_SCH_TAPRIO && TI_K3_AM65_CPTS help - Say y here to support Time Aware Shaper(TAS) offload in AM65 CPSW. - AM65 CPSW hardware supports Enhanced Scheduled Traffic (EST) - defined in IEEE 802.1Q 2018. The EST scheduler runs on CPTS and the - TAS/EST schedule is updated in the Fetch RAM memory of the CPSW. + This option enables QoS offload features in AM65 CPSW like + Time Aware Shaper (TAS) / Enhanced Scheduled Traffic (EST), + MQPRIO qdisc offload and Frame-Preemption MAC Merge / Interspersing + Express Traffic (IET). + The EST scheduler runs on CPTS and the TAS/EST schedule is + updated in the Fetch RAM memory of the CPSW. config TI_KEYSTONE_NETCP tristate "TI Keystone NETCP Core Support" diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 27de1d697134..d8590304f3df 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -26,7 +26,8 @@ keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale. obj-$(CONFIG_TI_K3_CPPI_DESC_POOL) += k3-cppi-desc-pool.o obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o -ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o am65-cpsw-qos.o +ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o +ti-am65-cpsw-nuss-$(CONFIG_TI_AM65_CPSW_QOS) += am65-cpsw-qos.o ti-am65-cpsw-nuss-$(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV) += am65-cpsw-switchdev.o obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index b9e1d568604b..35fceba01ea4 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -11,6 +11,7 @@ #include <linux/pm_runtime.h> #include "am65-cpsw-nuss.h" +#include "am65-cpsw-qos.h" #include "cpsw_ale.h" #include "am65-cpts.h" @@ -670,6 +671,9 @@ static void am65_cpsw_get_eth_mac_stats(struct net_device *ndev, stats = port->stat_base; + if (s->src != ETHTOOL_MAC_STATS_SRC_AGGREGATE) + return; + s->FramesTransmittedOK = readl_relaxed(&stats->tx_good_frames); s->SingleCollisionFrames = readl_relaxed(&stats->tx_single_coll_frames); s->MultipleCollisionFrames = readl_relaxed(&stats->tx_mult_coll_frames); @@ -740,6 +744,240 @@ static int am65_cpsw_set_ethtool_priv_flags(struct net_device *ndev, u32 flags) return 0; } +static void am65_cpsw_port_iet_rx_enable(struct am65_cpsw_port *port, bool enable) +{ + u32 val; + + val = readl(port->port_base + AM65_CPSW_PN_REG_CTL); + if (enable) + val |= AM65_CPSW_PN_CTL_IET_PORT_EN; + else + val &= ~AM65_CPSW_PN_CTL_IET_PORT_EN; + + writel(val, port->port_base + AM65_CPSW_PN_REG_CTL); + am65_cpsw_iet_common_enable(port->common); +} + +static void am65_cpsw_port_iet_tx_enable(struct am65_cpsw_port *port, bool enable) +{ + u32 val; + + val = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + if (enable) + val |= AM65_CPSW_PN_IET_MAC_PENABLE; + else + val &= ~AM65_CPSW_PN_IET_MAC_PENABLE; + + writel(val, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); +} + +static int am65_cpsw_get_mm(struct net_device *ndev, struct ethtool_mm_state *state) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_ndev_priv *priv = netdev_priv(ndev); + u32 port_ctrl, iet_ctrl, iet_status; + u32 add_frag_size; + + if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_QOS)) + return -EOPNOTSUPP; + + mutex_lock(&priv->mm_lock); + + iet_ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + port_ctrl = readl(port->port_base + AM65_CPSW_PN_REG_CTL); + + state->tx_enabled = !!(iet_ctrl & AM65_CPSW_PN_IET_MAC_PENABLE); + state->pmac_enabled = !!(port_ctrl & AM65_CPSW_PN_CTL_IET_PORT_EN); + + iet_status = readl(port->port_base + AM65_CPSW_PN_REG_IET_STATUS); + + if (iet_ctrl & AM65_CPSW_PN_IET_MAC_DISABLEVERIFY) + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; + else if (iet_status & AM65_CPSW_PN_MAC_VERIFIED) + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED; + else if (iet_status & AM65_CPSW_PN_MAC_VERIFY_FAIL) + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_FAILED; + else + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_UNKNOWN; + + add_frag_size = AM65_CPSW_PN_IET_MAC_GET_ADDFRAGSIZE(iet_ctrl); + state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(add_frag_size); + + /* Errata i2208: RX min fragment size cannot be less than 124 */ + state->rx_min_frag_size = 124; + + /* FPE active if common tx_enabled and verification success or disabled (forced) */ + state->tx_active = state->tx_enabled && + (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED || + state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED); + state->verify_enabled = !(iet_ctrl & AM65_CPSW_PN_IET_MAC_DISABLEVERIFY); + + state->verify_time = port->qos.iet.verify_time_ms; + + /* 802.3-2018 clause 30.14.1.6, says that the aMACMergeVerifyTime + * variable has a range between 1 and 128 ms inclusive. Limit to that. + */ + state->max_verify_time = 128; + + mutex_unlock(&priv->mm_lock); + + return 0; +} + +static int am65_cpsw_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_ndev_priv *priv = netdev_priv(ndev); + struct am65_cpsw_iet *iet = &port->qos.iet; + u32 val, add_frag_size; + int err; + + if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_QOS)) + return -EOPNOTSUPP; + + err = ethtool_mm_frag_size_min_to_add(cfg->tx_min_frag_size, &add_frag_size, extack); + if (err) + return err; + + mutex_lock(&priv->mm_lock); + + if (cfg->pmac_enabled) { + /* change TX & RX FIFO MAX_BLKS as per TRM recommendation */ + if (!iet->original_max_blks) + iet->original_max_blks = readl(port->port_base + AM65_CPSW_PN_REG_MAX_BLKS); + + writel(AM65_CPSW_PN_TX_RX_MAX_BLKS_IET, + port->port_base + AM65_CPSW_PN_REG_MAX_BLKS); + } else if (iet->original_max_blks) { + /* restore RX & TX FIFO MAX_BLKS */ + writel(iet->original_max_blks, + port->port_base + AM65_CPSW_PN_REG_MAX_BLKS); + } + + am65_cpsw_port_iet_rx_enable(port, cfg->pmac_enabled); + am65_cpsw_port_iet_tx_enable(port, cfg->tx_enabled); + + val = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + if (cfg->verify_enabled) { + val &= ~AM65_CPSW_PN_IET_MAC_DISABLEVERIFY; + /* Reset Verify state machine. Verification won't start here. + * Verification will be done once link-up. + */ + val |= AM65_CPSW_PN_IET_MAC_LINKFAIL; + } else { + val |= AM65_CPSW_PN_IET_MAC_DISABLEVERIFY; + /* Clear LINKFAIL to allow verify/response packets */ + val &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; + } + + val &= ~AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_MASK; + val |= AM65_CPSW_PN_IET_MAC_SET_ADDFRAGSIZE(add_frag_size); + writel(val, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + + /* verify_timeout_count can only be set at valid link */ + port->qos.iet.verify_time_ms = cfg->verify_time; + + /* enable/disable preemption based on link status */ + am65_cpsw_iet_commit_preemptible_tcs(port); + + mutex_unlock(&priv->mm_lock); + + return 0; +} + +static void am65_cpsw_get_mm_stats(struct net_device *ndev, + struct ethtool_mm_stats *s) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + void __iomem *base = port->stat_base; + + s->MACMergeFrameAssOkCount = readl(base + AM65_CPSW_STATN_IET_RX_ASSEMBLY_OK); + s->MACMergeFrameAssErrorCount = readl(base + AM65_CPSW_STATN_IET_RX_ASSEMBLY_ERROR); + s->MACMergeFrameSmdErrorCount = readl(base + AM65_CPSW_STATN_IET_RX_SMD_ERROR); + /* CPSW Functional Spec states: + * "The IET stat aMACMergeFragCountRx is derived by adding the + * Receive Assembly Error count to this value. i.e. AM65_CPSW_STATN_IET_RX_FRAG" + */ + s->MACMergeFragCountRx = readl(base + AM65_CPSW_STATN_IET_RX_FRAG) + s->MACMergeFrameAssErrorCount; + s->MACMergeFragCountTx = readl(base + AM65_CPSW_STATN_IET_TX_FRAG); + s->MACMergeHoldCount = readl(base + AM65_CPSW_STATN_IET_TX_HOLD); +} + +static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_tx_chn *tx_chn; + + tx_chn = &common->tx_chns[0]; + + coal->rx_coalesce_usecs = common->rx_pace_timeout / 1000; + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000; + + return 0; +} + +static int am65_cpsw_get_per_queue_coalesce(struct net_device *ndev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_tx_chn *tx_chn; + + if (queue >= AM65_CPSW_MAX_TX_QUEUES) + return -EINVAL; + + tx_chn = &common->tx_chns[queue]; + + coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000; + + return 0; +} + +static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_tx_chn *tx_chn; + + tx_chn = &common->tx_chns[0]; + + if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20) + return -EINVAL; + + if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) + return -EINVAL; + + common->rx_pace_timeout = coal->rx_coalesce_usecs * 1000; + tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000; + + return 0; +} + +static int am65_cpsw_set_per_queue_coalesce(struct net_device *ndev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_tx_chn *tx_chn; + + if (queue >= AM65_CPSW_MAX_TX_QUEUES) + return -EINVAL; + + tx_chn = &common->tx_chns[queue]; + + if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) { + dev_info(common->dev, "defaulting to min value of 20us for tx-usecs for tx-%u\n", + queue); + coal->tx_coalesce_usecs = 20; + } + + tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000; + + return 0; +} + const struct ethtool_ops am65_cpsw_ethtool_ops_slave = { .begin = am65_cpsw_ethtool_op_begin, .complete = am65_cpsw_ethtool_op_complete, @@ -758,6 +996,11 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = { .get_ts_info = am65_cpsw_get_ethtool_ts_info, .get_priv_flags = am65_cpsw_get_ethtool_priv_flags, .set_priv_flags = am65_cpsw_set_ethtool_priv_flags, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .get_coalesce = am65_cpsw_get_coalesce, + .set_coalesce = am65_cpsw_set_coalesce, + .get_per_queue_coalesce = am65_cpsw_get_per_queue_coalesce, + .set_per_queue_coalesce = am65_cpsw_set_per_queue_coalesce, .get_link = ethtool_op_get_link, .get_link_ksettings = am65_cpsw_get_link_ksettings, @@ -769,4 +1012,7 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = { .get_eee = am65_cpsw_get_eee, .set_eee = am65_cpsw_set_eee, .nway_reset = am65_cpsw_nway_reset, + .get_mm = am65_cpsw_get_mm, + .set_mm = am65_cpsw_set_mm, + .get_mm_stats = am65_cpsw_get_mm_stats, }; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 7651f90f51f2..faa0561e988e 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -596,8 +596,10 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) msecs_to_jiffies(1000)); if (!i) dev_err(common->dev, "tx timeout\n"); - for (i = 0; i < common->tx_ch_num; i++) + for (i = 0; i < common->tx_ch_num; i++) { napi_disable(&common->tx_chns[i].napi_tx); + hrtimer_cancel(&common->tx_chns[i].tx_hrtimer); + } for (i = 0; i < common->tx_ch_num; i++) { k3_udma_glue_reset_tx_chn(common->tx_chns[i].tx_chn, @@ -616,6 +618,7 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) } napi_disable(&common->napi_rx); + hrtimer_cancel(&common->rx_hrtimer); for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, i, @@ -885,6 +888,15 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, return ret; } +static enum hrtimer_restart am65_cpsw_nuss_rx_timer_callback(struct hrtimer *timer) +{ + struct am65_cpsw_common *common = + container_of(timer, struct am65_cpsw_common, rx_hrtimer); + + enable_irq(common->rx_chns.irq); + return HRTIMER_NORESTART; +} + static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) { struct am65_cpsw_common *common = am65_cpsw_napi_to_common(napi_rx); @@ -912,7 +924,13 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { if (common->rx_irq_disabled) { common->rx_irq_disabled = false; - enable_irq(common->rx_chns.irq); + if (unlikely(common->rx_pace_timeout)) { + hrtimer_start(&common->rx_hrtimer, + ns_to_ktime(common->rx_pace_timeout), + HRTIMER_MODE_REL_PINNED); + } else { + enable_irq(common->rx_chns.irq); + } } } @@ -968,7 +986,7 @@ static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_d } static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, - int chn, unsigned int budget) + int chn, unsigned int budget, bool *tdown) { struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; @@ -991,6 +1009,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, if (cppi5_desc_is_tdcm(desc_dma)) { if (atomic_dec_and_test(&common->tdown_cnt)) complete(&common->tdown_complete); + *tdown = true; break; } @@ -1013,7 +1032,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, } static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, - int chn, unsigned int budget) + int chn, unsigned int budget, bool *tdown) { struct device *dev = common->dev; struct am65_cpsw_tx_chn *tx_chn; @@ -1034,6 +1053,7 @@ static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, if (cppi5_desc_is_tdcm(desc_dma)) { if (atomic_dec_and_test(&common->tdown_cnt)) complete(&common->tdown_complete); + *tdown = true; break; } @@ -1059,21 +1079,40 @@ static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common, return num_tx; } +static enum hrtimer_restart am65_cpsw_nuss_tx_timer_callback(struct hrtimer *timer) +{ + struct am65_cpsw_tx_chn *tx_chns = + container_of(timer, struct am65_cpsw_tx_chn, tx_hrtimer); + + enable_irq(tx_chns->irq); + return HRTIMER_NORESTART; +} + static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) { struct am65_cpsw_tx_chn *tx_chn = am65_cpsw_napi_to_tx_chn(napi_tx); + bool tdown = false; int num_tx; if (AM65_CPSW_IS_CPSW2G(tx_chn->common)) - num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id, budget); + num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id, + budget, &tdown); else - num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget); + num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, + tx_chn->id, budget, &tdown); if (num_tx >= budget) return budget; - if (napi_complete_done(napi_tx, num_tx)) - enable_irq(tx_chn->irq); + if (napi_complete_done(napi_tx, num_tx)) { + if (unlikely(tx_chn->tx_pace_timeout && !tdown)) { + hrtimer_start(&tx_chn->tx_hrtimer, + ns_to_ktime(tx_chn->tx_pace_timeout), + HRTIMER_MODE_REL_PINNED); + } else { + enable_irq(tx_chn->irq); + } + } return 0; } @@ -1705,6 +1744,8 @@ static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common) netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx, am65_cpsw_nuss_tx_poll); + hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + tx_chn->tx_hrtimer.function = &am65_cpsw_nuss_tx_timer_callback; ret = devm_request_irq(dev, tx_chn->irq, am65_cpsw_nuss_tx_irq, @@ -1930,6 +1971,8 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) netif_napi_add(common->dma_ndev, &common->napi_rx, am65_cpsw_nuss_rx_poll); + hrtimer_init(&common->rx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + common->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback; ret = devm_request_irq(dev, rx_chn->irq, am65_cpsw_nuss_rx_irq, @@ -2127,6 +2170,9 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) dev_err(dev, "Use random MAC address\n"); } } + + /* Reset all Queue priorities to 0 */ + writel(0, port->port_base + AM65_CPSW_PN_REG_TX_PRI_MAP); } of_node_put(node); @@ -2191,6 +2237,8 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) ndev_priv = netdev_priv(port->ndev); ndev_priv->port = port; ndev_priv->msg_enable = AM65_CPSW_DEBUG; + mutex_init(&ndev_priv->mm_lock); + port->qos.link_speed = SPEED_UNKNOWN; SET_NETDEV_DEV(port->ndev, dev); eth_hw_addr_set(port->ndev, port->slave.mac_addr); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index f3dad2ab9828..7da0492dc091 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -75,6 +75,8 @@ struct am65_cpsw_tx_chn { struct k3_cppi_desc_pool *desc_pool; struct k3_udma_glue_tx_channel *tx_chn; spinlock_t lock; /* protect TX rings in multi-port mode */ + struct hrtimer tx_hrtimer; + unsigned long tx_pace_timeout; int irq; u32 id; u32 descs_num; @@ -138,6 +140,8 @@ struct am65_cpsw_common { struct napi_struct napi_rx; bool rx_irq_disabled; + struct hrtimer rx_hrtimer; + unsigned long rx_pace_timeout; u32 nuss_ver; u32 cpsw_ver; @@ -145,6 +149,7 @@ struct am65_cpsw_common { bool pf_p0_rx_ptype_rrobin; struct am65_cpts *cpts; int est_enabled; + bool iet_enabled; bool is_emac_mode; u16 br_members; @@ -170,6 +175,10 @@ struct am65_cpsw_ndev_priv { struct am65_cpsw_port *port; struct am65_cpsw_ndev_stats __percpu *stats; bool offload_fwd_mark; + /* Serialize access to MAC Merge state between ethtool requests + * and link state updates + */ + struct mutex mm_lock; }; #define am65_ndev_to_priv(ndev) \ diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 9ac2ff05d501..816e73a3d6e4 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -4,10 +4,13 @@ * * quality of service module includes: * Enhanced Scheduler Traffic (EST - P802.1Qbv/D2.2) + * Interspersed Express Traffic (IET - P802.3br/D2.0) */ #include <linux/pm_runtime.h> +#include <linux/math.h> #include <linux/time.h> +#include <linux/units.h> #include <net/pkt_cls.h> #include "am65-cpsw-nuss.h" @@ -15,40 +18,7 @@ #include "am65-cpts.h" #include "cpsw_ale.h" -#define AM65_CPSW_REG_CTL 0x004 -#define AM65_CPSW_PN_REG_CTL 0x004 -#define AM65_CPSW_PN_REG_FIFO_STATUS 0x050 -#define AM65_CPSW_PN_REG_EST_CTL 0x060 -#define AM65_CPSW_PN_REG_PRI_CIR(pri) (0x140 + 4 * (pri)) - -/* AM65_CPSW_REG_CTL register fields */ -#define AM65_CPSW_CTL_EST_EN BIT(18) - -/* AM65_CPSW_PN_REG_CTL register fields */ -#define AM65_CPSW_PN_CTL_EST_PORT_EN BIT(17) - -/* AM65_CPSW_PN_REG_EST_CTL register fields */ -#define AM65_CPSW_PN_EST_ONEBUF BIT(0) -#define AM65_CPSW_PN_EST_BUFSEL BIT(1) -#define AM65_CPSW_PN_EST_TS_EN BIT(2) -#define AM65_CPSW_PN_EST_TS_FIRST BIT(3) -#define AM65_CPSW_PN_EST_ONEPRI BIT(4) -#define AM65_CPSW_PN_EST_TS_PRI_MSK GENMASK(7, 5) - -/* AM65_CPSW_PN_REG_FIFO_STATUS register fields */ -#define AM65_CPSW_PN_FST_TX_PRI_ACTIVE_MSK GENMASK(7, 0) -#define AM65_CPSW_PN_FST_TX_E_MAC_ALLOW_MSK GENMASK(15, 8) -#define AM65_CPSW_PN_FST_EST_CNT_ERR BIT(16) -#define AM65_CPSW_PN_FST_EST_ADD_ERR BIT(17) -#define AM65_CPSW_PN_FST_EST_BUFACT BIT(18) - -/* EST FETCH COMMAND RAM */ -#define AM65_CPSW_FETCH_RAM_CMD_NUM 0x80 -#define AM65_CPSW_FETCH_CNT_MSK GENMASK(21, 8) -#define AM65_CPSW_FETCH_CNT_MAX (AM65_CPSW_FETCH_CNT_MSK >> 8) -#define AM65_CPSW_FETCH_CNT_OFFSET 8 -#define AM65_CPSW_FETCH_ALLOW_MSK GENMASK(7, 0) -#define AM65_CPSW_FETCH_ALLOW_MAX AM65_CPSW_FETCH_ALLOW_MSK +#define TO_MBPS(x) DIV_ROUND_UP((x), BYTES_PER_MBIT) enum timer_act { TACT_PROG, /* need program timer */ @@ -56,6 +26,412 @@ enum timer_act { TACT_SKIP_PROG, /* just buffer can be updated */ }; +static void am65_cpsw_iet_change_preemptible_tcs(struct am65_cpsw_port *port, u8 preemptible_tcs); + +static u32 +am65_cpsw_qos_tx_rate_calc(u32 rate_mbps, unsigned long bus_freq) +{ + u32 ir; + + bus_freq /= 1000000; + ir = DIV_ROUND_UP(((u64)rate_mbps * 32768), bus_freq); + return ir; +} + +static void am65_cpsw_tx_pn_shaper_reset(struct am65_cpsw_port *port) +{ + int prio; + + for (prio = 0; prio < AM65_CPSW_PN_FIFO_PRIO_NUM; prio++) { + writel(0, port->port_base + AM65_CPSW_PN_REG_PRI_CIR(prio)); + writel(0, port->port_base + AM65_CPSW_PN_REG_PRI_EIR(prio)); + } +} + +static void am65_cpsw_tx_pn_shaper_apply(struct am65_cpsw_port *port) +{ + struct am65_cpsw_mqprio *p_mqprio = &port->qos.mqprio; + struct am65_cpsw_common *common = port->common; + struct tc_mqprio_qopt_offload *mqprio; + bool enable, shaper_susp = false; + u32 rate_mbps; + int tc, prio; + + mqprio = &p_mqprio->mqprio_hw; + /* takes care of no link case as well */ + if (p_mqprio->max_rate_total > port->qos.link_speed) + shaper_susp = true; + + am65_cpsw_tx_pn_shaper_reset(port); + + enable = p_mqprio->shaper_en && !shaper_susp; + if (!enable) + return; + + /* Rate limit is specified per Traffic Class but + * for CPSW, rate limit can be applied per priority + * at port FIFO. + * + * We have assigned the same priority (TCn) to all queues + * of a Traffic Class so they share the same shaper + * bandwidth. + */ + for (tc = 0; tc < mqprio->qopt.num_tc; tc++) { + prio = tc; + + rate_mbps = TO_MBPS(mqprio->min_rate[tc]); + rate_mbps = am65_cpsw_qos_tx_rate_calc(rate_mbps, + common->bus_freq); + writel(rate_mbps, + port->port_base + AM65_CPSW_PN_REG_PRI_CIR(prio)); + + rate_mbps = 0; + + if (mqprio->max_rate[tc]) { + rate_mbps = mqprio->max_rate[tc] - mqprio->min_rate[tc]; + rate_mbps = TO_MBPS(rate_mbps); + rate_mbps = am65_cpsw_qos_tx_rate_calc(rate_mbps, + common->bus_freq); + } + + writel(rate_mbps, + port->port_base + AM65_CPSW_PN_REG_PRI_EIR(prio)); + } +} + +static int am65_cpsw_mqprio_verify_shaper(struct am65_cpsw_port *port, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct am65_cpsw_mqprio *p_mqprio = &port->qos.mqprio; + struct netlink_ext_ack *extack = mqprio->extack; + u64 min_rate_total = 0, max_rate_total = 0; + u32 min_rate_msk = 0, max_rate_msk = 0; + bool has_min_rate, has_max_rate; + int num_tc, i; + + if (!(mqprio->flags & TC_MQPRIO_F_SHAPER)) + return 0; + + if (mqprio->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return 0; + + has_min_rate = !!(mqprio->flags & TC_MQPRIO_F_MIN_RATE); + has_max_rate = !!(mqprio->flags & TC_MQPRIO_F_MAX_RATE); + + if (!has_min_rate && has_max_rate) { + NL_SET_ERR_MSG_MOD(extack, "min_rate is required with max_rate"); + return -EOPNOTSUPP; + } + + if (!has_min_rate) + return 0; + + num_tc = mqprio->qopt.num_tc; + + for (i = num_tc - 1; i >= 0; i--) { + u32 ch_msk; + + if (mqprio->min_rate[i]) + min_rate_msk |= BIT(i); + min_rate_total += mqprio->min_rate[i]; + + if (has_max_rate) { + if (mqprio->max_rate[i]) + max_rate_msk |= BIT(i); + max_rate_total += mqprio->max_rate[i]; + + if (!mqprio->min_rate[i] && mqprio->max_rate[i]) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "TX tc%d rate max>0 but min=0", + i); + return -EINVAL; + } + + if (mqprio->max_rate[i] && + mqprio->max_rate[i] < mqprio->min_rate[i]) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "TX tc%d rate min(%llu)>max(%llu)", + i, mqprio->min_rate[i], + mqprio->max_rate[i]); + return -EINVAL; + } + } + + ch_msk = GENMASK(num_tc - 1, i); + if ((min_rate_msk & BIT(i)) && (min_rate_msk ^ ch_msk)) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Min rate must be set sequentially hi->lo tx_rate_msk%x", + min_rate_msk); + return -EINVAL; + } + + if ((max_rate_msk & BIT(i)) && (max_rate_msk ^ ch_msk)) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Max rate must be set sequentially hi->lo tx_rate_msk%x", + max_rate_msk); + return -EINVAL; + } + } + + min_rate_total = TO_MBPS(min_rate_total); + max_rate_total = TO_MBPS(max_rate_total); + + p_mqprio->shaper_en = true; + p_mqprio->max_rate_total = max_t(u64, min_rate_total, max_rate_total); + + return 0; +} + +static void am65_cpsw_reset_tc_mqprio(struct net_device *ndev) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_mqprio *p_mqprio = &port->qos.mqprio; + + p_mqprio->shaper_en = false; + p_mqprio->max_rate_total = 0; + + am65_cpsw_tx_pn_shaper_reset(port); + netdev_reset_tc(ndev); + + /* Reset all Queue priorities to 0 */ + writel(0, port->port_base + AM65_CPSW_PN_REG_TX_PRI_MAP); + + am65_cpsw_iet_change_preemptible_tcs(port, 0); +} + +static int am65_cpsw_setup_mqprio(struct net_device *ndev, void *type_data) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_mqprio *p_mqprio = &port->qos.mqprio; + struct tc_mqprio_qopt_offload *mqprio = type_data; + struct am65_cpsw_common *common = port->common; + struct tc_mqprio_qopt *qopt = &mqprio->qopt; + int i, tc, offset, count, prio, ret; + u8 num_tc = qopt->num_tc; + u32 tx_prio_map = 0; + + memcpy(&p_mqprio->mqprio_hw, mqprio, sizeof(*mqprio)); + + ret = pm_runtime_get_sync(common->dev); + if (ret < 0) { + pm_runtime_put_noidle(common->dev); + return ret; + } + + if (!num_tc) { + am65_cpsw_reset_tc_mqprio(ndev); + ret = 0; + goto exit_put; + } + + ret = am65_cpsw_mqprio_verify_shaper(port, mqprio); + if (ret) + goto exit_put; + + netdev_set_num_tc(ndev, num_tc); + + /* Multiple Linux priorities can map to a Traffic Class + * A Traffic Class can have multiple contiguous Queues, + * Queues get mapped to Channels (thread_id), + * if not VLAN tagged, thread_id is used as packet_priority + * if VLAN tagged. VLAN priority is used as packet_priority + * packet_priority gets mapped to header_priority in p0_rx_pri_map, + * header_priority gets mapped to switch_priority in pn_tx_pri_map. + * As p0_rx_pri_map is left at defaults (0x76543210), we can + * assume that Queue_n gets mapped to header_priority_n. We can then + * set the switch priority in pn_tx_pri_map. + */ + + for (tc = 0; tc < num_tc; tc++) { + prio = tc; + + /* For simplicity we assign the same priority (TCn) to + * all queues of a Traffic Class. + */ + for (i = qopt->offset[tc]; i < qopt->offset[tc] + qopt->count[tc]; i++) + tx_prio_map |= prio << (4 * i); + + count = qopt->count[tc]; + offset = qopt->offset[tc]; + netdev_set_tc_queue(ndev, tc, count, offset); + } + + writel(tx_prio_map, port->port_base + AM65_CPSW_PN_REG_TX_PRI_MAP); + + am65_cpsw_tx_pn_shaper_apply(port); + am65_cpsw_iet_change_preemptible_tcs(port, mqprio->preemptible_tcs); + +exit_put: + pm_runtime_put(common->dev); + + return ret; +} + +static int am65_cpsw_iet_set_verify_timeout_count(struct am65_cpsw_port *port) +{ + int verify_time_ms = port->qos.iet.verify_time_ms; + u32 val; + + /* The number of wireside clocks contained in the verify + * timeout counter. The default is 0x1312d0 + * (10ms at 125Mhz in 1G mode). + */ + val = 125 * HZ_PER_MHZ; /* assuming 125MHz wireside clock */ + + val /= MILLIHZ_PER_HZ; /* count per ms timeout */ + val *= verify_time_ms; /* count for timeout ms */ + + if (val > AM65_CPSW_PN_MAC_VERIFY_CNT_MASK) + return -EINVAL; + + writel(val, port->port_base + AM65_CPSW_PN_REG_IET_VERIFY); + + return 0; +} + +static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) +{ + u32 ctrl, status; + int try; + + try = 20; + do { + /* Reset the verify state machine by writing 1 + * to LINKFAIL + */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + + /* Clear MAC_LINKFAIL bit to start Verify. */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + + msleep(port->qos.iet.verify_time_ms); + + status = readl(port->port_base + AM65_CPSW_PN_REG_IET_STATUS); + if (status & AM65_CPSW_PN_MAC_VERIFIED) + return 0; + + if (status & AM65_CPSW_PN_MAC_VERIFY_FAIL) { + netdev_dbg(port->ndev, + "MAC Merge verify failed, trying again\n"); + continue; + } + + if (status & AM65_CPSW_PN_MAC_RESPOND_ERR) { + netdev_dbg(port->ndev, "MAC Merge respond error\n"); + return -ENODEV; + } + + if (status & AM65_CPSW_PN_MAC_VERIFY_ERR) { + netdev_dbg(port->ndev, "MAC Merge verify error\n"); + return -ENODEV; + } + } while (try-- > 0); + + netdev_dbg(port->ndev, "MAC Merge verify timeout\n"); + return -ETIMEDOUT; +} + +static void am65_cpsw_iet_set_preempt_mask(struct am65_cpsw_port *port, u8 preemptible_tcs) +{ + u32 val; + + val = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + val &= ~AM65_CPSW_PN_IET_MAC_PREMPT_MASK; + val |= AM65_CPSW_PN_IET_MAC_SET_PREEMPT(preemptible_tcs); + writel(val, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); +} + +/* enable common IET_ENABLE only if at least 1 port has rx IET enabled. + * UAPI doesn't allow tx enable without rx enable. + */ +void am65_cpsw_iet_common_enable(struct am65_cpsw_common *common) +{ + struct am65_cpsw_port *port; + bool rx_enable = false; + u32 val; + int i; + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + val = readl(port->port_base + AM65_CPSW_PN_REG_CTL); + rx_enable = !!(val & AM65_CPSW_PN_CTL_IET_PORT_EN); + if (rx_enable) + break; + } + + val = readl(common->cpsw_base + AM65_CPSW_REG_CTL); + + if (rx_enable) + val |= AM65_CPSW_CTL_IET_EN; + else + val &= ~AM65_CPSW_CTL_IET_EN; + + writel(val, common->cpsw_base + AM65_CPSW_REG_CTL); + common->iet_enabled = rx_enable; +} + +/* CPSW does not have an IRQ to notify changes to the MAC Merge TX status + * (active/inactive), but the preemptible traffic classes should only be + * committed to hardware once TX is active. Resort to polling. + */ +void am65_cpsw_iet_commit_preemptible_tcs(struct am65_cpsw_port *port) +{ + u8 preemptible_tcs; + int err; + u32 val; + + if (port->qos.link_speed == SPEED_UNKNOWN) + return; + + val = readl(port->port_base + AM65_CPSW_PN_REG_CTL); + if (!(val & AM65_CPSW_PN_CTL_IET_PORT_EN)) + return; + + /* update common IET enable */ + am65_cpsw_iet_common_enable(port->common); + + /* update verify count */ + err = am65_cpsw_iet_set_verify_timeout_count(port); + if (err) { + netdev_err(port->ndev, "couldn't set verify count: %d\n", err); + return; + } + + val = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + if (!(val & AM65_CPSW_PN_IET_MAC_DISABLEVERIFY)) { + err = am65_cpsw_iet_verify_wait(port); + if (err) + return; + } + + preemptible_tcs = port->qos.iet.preemptible_tcs; + am65_cpsw_iet_set_preempt_mask(port, preemptible_tcs); +} + +static void am65_cpsw_iet_change_preemptible_tcs(struct am65_cpsw_port *port, u8 preemptible_tcs) +{ + struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + + port->qos.iet.preemptible_tcs = preemptible_tcs; + mutex_lock(&priv->mm_lock); + am65_cpsw_iet_commit_preemptible_tcs(port); + mutex_unlock(&priv->mm_lock); +} + +static void am65_cpsw_iet_link_state_update(struct net_device *ndev) +{ + struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + mutex_lock(&priv->mm_lock); + am65_cpsw_iet_commit_preemptible_tcs(port); + mutex_unlock(&priv->mm_lock); +} + static int am65_cpsw_port_est_enabled(struct am65_cpsw_port *port) { return port->qos.est_oper || port->qos.est_admin; @@ -428,7 +804,7 @@ static void am65_cpsw_stop_est(struct net_device *ndev) am65_cpsw_timer_stop(ndev); } -static void am65_cpsw_purge_est(struct net_device *ndev) +static void am65_cpsw_taprio_destroy(struct net_device *ndev) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); @@ -439,31 +815,74 @@ static void am65_cpsw_purge_est(struct net_device *ndev) port->qos.est_oper = NULL; port->qos.est_admin = NULL; + + am65_cpsw_reset_tc_mqprio(ndev); } -static int am65_cpsw_configure_taprio(struct net_device *ndev, - struct am65_cpsw_est *est_new) +static void am65_cpsw_cp_taprio(struct tc_taprio_qopt_offload *from, + struct tc_taprio_qopt_offload *to) +{ + int i; + + *to = *from; + for (i = 0; i < from->num_entries; i++) + to->entries[i] = from->entries[i]; +} + +static int am65_cpsw_taprio_replace(struct net_device *ndev, + struct tc_taprio_qopt_offload *taprio) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct netlink_ext_ack *extack = taprio->mqprio.extack; + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); struct am65_cpts *cpts = common->cpts; - int ret = 0, tact = TACT_PROG; + struct am65_cpsw_est *est_new; + int ret, tact; - am65_cpsw_est_update_state(ndev); + if (!netif_running(ndev)) { + NL_SET_ERR_MSG_MOD(extack, "interface is down, link speed unknown"); + return -ENETDOWN; + } - if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) { - am65_cpsw_stop_est(ndev); - return ret; + if (common->pf_p0_rx_ptype_rrobin) { + NL_SET_ERR_MSG_MOD(extack, + "p0-rx-ptype-rrobin flag conflicts with taprio qdisc"); + return -EINVAL; } + if (port->qos.link_speed == SPEED_UNKNOWN) + return -ENOLINK; + + if (taprio->cycle_time_extension) { + NL_SET_ERR_MSG_MOD(extack, + "cycle time extension not supported"); + return -EOPNOTSUPP; + } + + est_new = devm_kzalloc(&ndev->dev, + struct_size(est_new, taprio.entries, taprio->num_entries), + GFP_KERNEL); + if (!est_new) + return -ENOMEM; + + ret = am65_cpsw_setup_mqprio(ndev, &taprio->mqprio); + if (ret) + return ret; + + am65_cpsw_cp_taprio(taprio, &est_new->taprio); + + am65_cpsw_est_update_state(ndev); + ret = am65_cpsw_est_check_scheds(ndev, est_new); if (ret < 0) - return ret; + goto fail; tact = am65_cpsw_timer_act(ndev, est_new); if (tact == TACT_NEED_STOP) { - dev_err(&ndev->dev, - "Can't toggle estf timer, stop taprio first"); - return -EINVAL; + NL_SET_ERR_MSG_MOD(extack, + "Can't toggle estf timer, stop taprio first"); + ret = -EINVAL; + goto fail; } if (tact == TACT_PROG) @@ -476,62 +895,26 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_set_sched_list(ndev, est_new); am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf); - am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE); + am65_cpsw_est_set(ndev, 1); if (tact == TACT_PROG) { ret = am65_cpsw_timer_set(ndev, est_new); if (ret) { - dev_err(&ndev->dev, "Failed to set cycle time"); - return ret; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set cycle time"); + goto fail; } } - return ret; -} - -static void am65_cpsw_cp_taprio(struct tc_taprio_qopt_offload *from, - struct tc_taprio_qopt_offload *to) -{ - int i; - - *to = *from; - for (i = 0; i < from->num_entries; i++) - to->entries[i] = from->entries[i]; -} - -static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data) -{ - struct am65_cpsw_port *port = am65_ndev_to_port(ndev); - struct tc_taprio_qopt_offload *taprio = type_data; - struct am65_cpsw_est *est_new; - int ret = 0; - - if (taprio->cycle_time_extension) { - dev_err(&ndev->dev, "Failed to set cycle time extension"); - return -EOPNOTSUPP; - } - - est_new = devm_kzalloc(&ndev->dev, - struct_size(est_new, taprio.entries, taprio->num_entries), - GFP_KERNEL); - if (!est_new) - return -ENOMEM; - - am65_cpsw_cp_taprio(taprio, &est_new->taprio); - ret = am65_cpsw_configure_taprio(ndev, est_new); - if (!ret) { - if (taprio->cmd == TAPRIO_CMD_REPLACE) { - devm_kfree(&ndev->dev, port->qos.est_admin); + devm_kfree(&ndev->dev, port->qos.est_admin); + port->qos.est_admin = est_new; + am65_cpsw_iet_change_preemptible_tcs(port, taprio->mqprio.preemptible_tcs); - port->qos.est_admin = est_new; - } else { - devm_kfree(&ndev->dev, est_new); - am65_cpsw_purge_est(ndev); - } - } else { - devm_kfree(&ndev->dev, est_new); - } + return 0; +fail: + am65_cpsw_reset_tc_mqprio(ndev); + devm_kfree(&ndev->dev, est_new); return ret; } @@ -541,7 +924,6 @@ static void am65_cpsw_est_link_up(struct net_device *ndev, int link_speed) ktime_t cur_time; s64 delta; - port->qos.link_speed = link_speed; if (!am65_cpsw_port_est_enabled(port)) return; @@ -558,37 +940,26 @@ static void am65_cpsw_est_link_up(struct net_device *ndev, int link_speed) return; purge_est: - am65_cpsw_purge_est(ndev); + am65_cpsw_taprio_destroy(ndev); } static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data) { - struct am65_cpsw_port *port = am65_ndev_to_port(ndev); struct tc_taprio_qopt_offload *taprio = type_data; - struct am65_cpsw_common *common = port->common; - - if (taprio->cmd != TAPRIO_CMD_REPLACE && - taprio->cmd != TAPRIO_CMD_DESTROY) - return -EOPNOTSUPP; - - if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) - return -ENODEV; - - if (!netif_running(ndev)) { - dev_err(&ndev->dev, "interface is down, link speed unknown\n"); - return -ENETDOWN; - } - - if (common->pf_p0_rx_ptype_rrobin) { - dev_err(&ndev->dev, - "p0-rx-ptype-rrobin flag conflicts with taprio qdisc\n"); - return -EINVAL; + int err = 0; + + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + err = am65_cpsw_taprio_replace(ndev, taprio); + break; + case TAPRIO_CMD_DESTROY: + am65_cpsw_taprio_destroy(ndev); + break; + default: + err = -EOPNOTSUPP; } - if (port->qos.link_speed == SPEED_UNKNOWN) - return -ENOLINK; - - return am65_cpsw_set_taprio(ndev, type_data); + return err; } static int am65_cpsw_tc_query_caps(struct net_device *ndev, void *type_data) @@ -596,12 +967,17 @@ static int am65_cpsw_tc_query_caps(struct net_device *ndev, void *type_data) struct tc_query_caps_base *base = type_data; switch (base->type) { + case TC_SETUP_QDISC_MQPRIO: { + struct tc_mqprio_caps *caps = base->caps; + + caps->validate_queue_counts = true; + + return 0; + } + case TC_SETUP_QDISC_TAPRIO: { struct tc_taprio_caps *caps = base->caps; - if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) - return -EOPNOTSUPP; - caps->gate_mask_per_txq = true; return 0; @@ -787,55 +1163,6 @@ static int am65_cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_blo port, port, true); } -int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, - void *type_data) -{ - switch (type) { - case TC_QUERY_CAPS: - return am65_cpsw_tc_query_caps(ndev, type_data); - case TC_SETUP_QDISC_TAPRIO: - return am65_cpsw_setup_taprio(ndev, type_data); - case TC_SETUP_BLOCK: - return am65_cpsw_qos_setup_tc_block(ndev, type_data); - default: - return -EOPNOTSUPP; - } -} - -void am65_cpsw_qos_link_up(struct net_device *ndev, int link_speed) -{ - struct am65_cpsw_port *port = am65_ndev_to_port(ndev); - - if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) - return; - - am65_cpsw_est_link_up(ndev, link_speed); - port->qos.link_down_time = 0; -} - -void am65_cpsw_qos_link_down(struct net_device *ndev) -{ - struct am65_cpsw_port *port = am65_ndev_to_port(ndev); - - if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) - return; - - if (!port->qos.link_down_time) - port->qos.link_down_time = ktime_get(); - - port->qos.link_speed = SPEED_UNKNOWN; -} - -static u32 -am65_cpsw_qos_tx_rate_calc(u32 rate_mbps, unsigned long bus_freq) -{ - u32 ir; - - bus_freq /= 1000000; - ir = DIV_ROUND_UP(((u64)rate_mbps * 32768), bus_freq); - return ir; -} - static void am65_cpsw_qos_tx_p0_rate_apply(struct am65_cpsw_common *common, int tx_ch, u32 rate_mbps) @@ -937,3 +1264,44 @@ void am65_cpsw_qos_tx_p0_rate_init(struct am65_cpsw_common *common) host->port_base + AM65_CPSW_PN_REG_PRI_CIR(tx_ch)); } } + +int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_QUERY_CAPS: + return am65_cpsw_tc_query_caps(ndev, type_data); + case TC_SETUP_QDISC_TAPRIO: + return am65_cpsw_setup_taprio(ndev, type_data); + case TC_SETUP_QDISC_MQPRIO: + return am65_cpsw_setup_mqprio(ndev, type_data); + case TC_SETUP_BLOCK: + return am65_cpsw_qos_setup_tc_block(ndev, type_data); + default: + return -EOPNOTSUPP; + } +} + +void am65_cpsw_qos_link_up(struct net_device *ndev, int link_speed) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + port->qos.link_speed = link_speed; + am65_cpsw_tx_pn_shaper_apply(port); + am65_cpsw_iet_link_state_update(ndev); + + am65_cpsw_est_link_up(ndev, link_speed); + port->qos.link_down_time = 0; +} + +void am65_cpsw_qos_link_down(struct net_device *ndev) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + port->qos.link_speed = SPEED_UNKNOWN; + am65_cpsw_tx_pn_shaper_apply(port); + am65_cpsw_iet_link_state_update(ndev); + + if (!port->qos.link_down_time) + port->qos.link_down_time = ktime_get(); +} diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.h b/drivers/net/ethernet/ti/am65-cpsw-qos.h index 0cc2a3b3d7f9..b328e56c5b2b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.h +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.h @@ -9,6 +9,7 @@ #include <net/pkt_sched.h> struct am65_cpsw_common; +struct am65_cpsw_port; struct am65_cpsw_est { int buf; @@ -16,6 +17,18 @@ struct am65_cpsw_est { struct tc_taprio_qopt_offload taprio; }; +struct am65_cpsw_mqprio { + struct tc_mqprio_qopt_offload mqprio_hw; + u64 max_rate_total; + bool shaper_en; +}; + +struct am65_cpsw_iet { + u8 preemptible_tcs; + u32 original_max_blks; + int verify_time_ms; +}; + struct am65_cpsw_ale_ratelimit { unsigned long cookie; u64 rate_packet_ps; @@ -26,16 +39,189 @@ struct am65_cpsw_qos { struct am65_cpsw_est *est_oper; ktime_t link_down_time; int link_speed; + struct am65_cpsw_mqprio mqprio; + struct am65_cpsw_iet iet; struct am65_cpsw_ale_ratelimit ale_bc_ratelimit; struct am65_cpsw_ale_ratelimit ale_mc_ratelimit; }; +#define AM65_CPSW_REG_CTL 0x004 +#define AM65_CPSW_PN_REG_CTL 0x004 +#define AM65_CPSW_PN_REG_FIFO_STATUS 0x050 +#define AM65_CPSW_PN_REG_EST_CTL 0x060 +#define AM65_CPSW_PN_REG_PRI_CIR(pri) (0x140 + 4 * (pri)) +#define AM65_CPSW_P0_REG_PRI_EIR(pri) (0x160 + 4 * (pri)) + +#define AM65_CPSW_PN_REG_CTL 0x004 +#define AM65_CPSW_PN_REG_TX_PRI_MAP 0x018 +#define AM65_CPSW_PN_REG_RX_PRI_MAP 0x020 +#define AM65_CPSW_PN_REG_FIFO_STATUS 0x050 +#define AM65_CPSW_PN_REG_EST_CTL 0x060 +#define AM65_CPSW_PN_REG_PRI_CIR(pri) (0x140 + 4 * (pri)) +#define AM65_CPSW_PN_REG_PRI_EIR(pri) (0x160 + 4 * (pri)) + +/* AM65_CPSW_REG_CTL register fields */ +#define AM65_CPSW_CTL_EST_EN BIT(18) + +/* AM65_CPSW_PN_REG_CTL register fields */ +#define AM65_CPSW_PN_CTL_EST_PORT_EN BIT(17) + +/* AM65_CPSW_PN_REG_EST_CTL register fields */ +#define AM65_CPSW_PN_EST_ONEBUF BIT(0) +#define AM65_CPSW_PN_EST_BUFSEL BIT(1) +#define AM65_CPSW_PN_EST_TS_EN BIT(2) +#define AM65_CPSW_PN_EST_TS_FIRST BIT(3) +#define AM65_CPSW_PN_EST_ONEPRI BIT(4) +#define AM65_CPSW_PN_EST_TS_PRI_MSK GENMASK(7, 5) + +/* AM65_CPSW_PN_REG_FIFO_STATUS register fields */ +#define AM65_CPSW_PN_FST_TX_PRI_ACTIVE_MSK GENMASK(7, 0) +#define AM65_CPSW_PN_FST_TX_E_MAC_ALLOW_MSK GENMASK(15, 8) +#define AM65_CPSW_PN_FST_EST_CNT_ERR BIT(16) +#define AM65_CPSW_PN_FST_EST_ADD_ERR BIT(17) +#define AM65_CPSW_PN_FST_EST_BUFACT BIT(18) + +/* EST FETCH COMMAND RAM */ +#define AM65_CPSW_FETCH_RAM_CMD_NUM 0x80 +#define AM65_CPSW_FETCH_CNT_MSK GENMASK(21, 8) +#define AM65_CPSW_FETCH_CNT_MAX (AM65_CPSW_FETCH_CNT_MSK >> 8) +#define AM65_CPSW_FETCH_CNT_OFFSET 8 +#define AM65_CPSW_FETCH_ALLOW_MSK GENMASK(7, 0) +#define AM65_CPSW_FETCH_ALLOW_MAX AM65_CPSW_FETCH_ALLOW_MSK + +/* number of priority queues per port FIFO */ +#define AM65_CPSW_PN_FIFO_PRIO_NUM 8 + +#if IS_ENABLED(CONFIG_TI_AM65_CPSW_QOS) int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data); void am65_cpsw_qos_link_up(struct net_device *ndev, int link_speed); void am65_cpsw_qos_link_down(struct net_device *ndev); int am65_cpsw_qos_ndo_tx_p0_set_maxrate(struct net_device *ndev, int queue, u32 rate_mbps); void am65_cpsw_qos_tx_p0_rate_init(struct am65_cpsw_common *common); +void am65_cpsw_iet_commit_preemptible_tcs(struct am65_cpsw_port *port); +void am65_cpsw_iet_common_enable(struct am65_cpsw_common *common); +#else +static inline int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, + enum tc_setup_type type, + void *type_data) +{ + return -EOPNOTSUPP; +} + +static inline void am65_cpsw_qos_link_up(struct net_device *ndev, + int link_speed) +{ } + +static inline void am65_cpsw_qos_link_down(struct net_device *ndev) +{ } + +static inline int am65_cpsw_qos_ndo_tx_p0_set_maxrate(struct net_device *ndev, + int queue, + u32 rate_mbps) +{ + return 0; +} + +static inline void am65_cpsw_qos_tx_p0_rate_init(struct am65_cpsw_common *common) +{ } +static inline void am65_cpsw_iet_commit_preemptible_tcs(struct am65_cpsw_port *port) +{ } +static inline void am65_cpsw_iet_common_enable(struct am65_cpsw_common *common) +{ } +#endif + +#define AM65_CPSW_REG_CTL 0x004 +#define AM65_CPSW_PN_REG_CTL 0x004 +#define AM65_CPSW_PN_REG_MAX_BLKS 0x008 +#define AM65_CPSW_PN_REG_TX_PRI_MAP 0x018 +#define AM65_CPSW_PN_REG_RX_PRI_MAP 0x020 +#define AM65_CPSW_PN_REG_IET_CTRL 0x040 +#define AM65_CPSW_PN_REG_IET_STATUS 0x044 +#define AM65_CPSW_PN_REG_IET_VERIFY 0x048 +#define AM65_CPSW_PN_REG_FIFO_STATUS 0x050 +#define AM65_CPSW_PN_REG_EST_CTL 0x060 +#define AM65_CPSW_PN_REG_PRI_CIR(pri) (0x140 + 4 * (pri)) +#define AM65_CPSW_PN_REG_PRI_EIR(pri) (0x160 + 4 * (pri)) + +/* AM65_CPSW_REG_CTL register fields */ +#define AM65_CPSW_CTL_IET_EN BIT(17) +#define AM65_CPSW_CTL_EST_EN BIT(18) + +/* AM65_CPSW_PN_REG_CTL register fields */ +#define AM65_CPSW_PN_CTL_IET_PORT_EN BIT(16) +#define AM65_CPSW_PN_CTL_EST_PORT_EN BIT(17) + +/* AM65_CPSW_PN_REG_EST_CTL register fields */ +#define AM65_CPSW_PN_EST_ONEBUF BIT(0) +#define AM65_CPSW_PN_EST_BUFSEL BIT(1) +#define AM65_CPSW_PN_EST_TS_EN BIT(2) +#define AM65_CPSW_PN_EST_TS_FIRST BIT(3) +#define AM65_CPSW_PN_EST_ONEPRI BIT(4) +#define AM65_CPSW_PN_EST_TS_PRI_MSK GENMASK(7, 5) + +/* AM65_CPSW_PN_REG_IET_CTRL register fields */ +#define AM65_CPSW_PN_IET_MAC_PENABLE BIT(0) +#define AM65_CPSW_PN_IET_MAC_DISABLEVERIFY BIT(2) +#define AM65_CPSW_PN_IET_MAC_LINKFAIL BIT(3) +#define AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_MASK GENMASK(10, 8) +#define AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_OFFSET 8 +#define AM65_CPSW_PN_IET_MAC_PREMPT_MASK GENMASK(23, 16) +#define AM65_CPSW_PN_IET_MAC_PREMPT_OFFSET 16 + +#define AM65_CPSW_PN_IET_MAC_SET_ADDFRAGSIZE(n) (((n) << AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_OFFSET) & \ + AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_MASK) +#define AM65_CPSW_PN_IET_MAC_GET_ADDFRAGSIZE(n) (((n) & AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_MASK) >> \ + AM65_CPSW_PN_IET_MAC_MAC_ADDFRAGSIZE_OFFSET) +#define AM65_CPSW_PN_IET_MAC_SET_PREEMPT(n) (((n) << AM65_CPSW_PN_IET_MAC_PREMPT_OFFSET) & \ + AM65_CPSW_PN_IET_MAC_PREMPT_MASK) +#define AM65_CPSW_PN_IET_MAC_GET_PREEMPT(n) (((n) & AM65_CPSW_PN_IET_MAC_PREMPT_MASK) >> \ + AM65_CPSW_PN_IET_MAC_PREMPT_OFFSET) + +/* AM65_CPSW_PN_REG_IET_STATUS register fields */ +#define AM65_CPSW_PN_MAC_STATUS GENMASK(3, 0) +#define AM65_CPSW_PN_MAC_VERIFIED BIT(0) +#define AM65_CPSW_PN_MAC_VERIFY_FAIL BIT(1) +#define AM65_CPSW_PN_MAC_RESPOND_ERR BIT(2) +#define AM65_CPSW_PN_MAC_VERIFY_ERR BIT(3) + +/* AM65_CPSW_PN_REG_IET_VERIFY register fields */ +#define AM65_CPSW_PN_MAC_VERIFY_CNT_MASK GENMASK(23, 0) +#define AM65_CPSW_PN_MAC_GET_VERIFY_CNT(n) ((n) & AM65_CPSW_PN_MAC_VERIFY_CNT_MASK) +/* 10 msec converted to NSEC */ +#define AM65_CPSW_IET_VERIFY_CNT_MS (10) +#define AM65_CPSW_IET_VERIFY_CNT_NS (AM65_CPSW_IET_VERIFY_CNT_MS * \ + NSEC_PER_MSEC) + +/* AM65_CPSW_PN_REG_FIFO_STATUS register fields */ +#define AM65_CPSW_PN_FST_TX_PRI_ACTIVE_MSK GENMASK(7, 0) +#define AM65_CPSW_PN_FST_TX_E_MAC_ALLOW_MSK GENMASK(15, 8) +#define AM65_CPSW_PN_FST_EST_CNT_ERR BIT(16) +#define AM65_CPSW_PN_FST_EST_ADD_ERR BIT(17) +#define AM65_CPSW_PN_FST_EST_BUFACT BIT(18) + +/* EST FETCH COMMAND RAM */ +#define AM65_CPSW_FETCH_RAM_CMD_NUM 0x80 +#define AM65_CPSW_FETCH_CNT_MSK GENMASK(21, 8) +#define AM65_CPSW_FETCH_CNT_MAX (AM65_CPSW_FETCH_CNT_MSK >> 8) +#define AM65_CPSW_FETCH_CNT_OFFSET 8 +#define AM65_CPSW_FETCH_ALLOW_MSK GENMASK(7, 0) +#define AM65_CPSW_FETCH_ALLOW_MAX AM65_CPSW_FETCH_ALLOW_MSK + +/* AM65_CPSW_PN_REG_MAX_BLKS fields for IET and No IET cases */ +/* 7 blocks for pn_rx_max_blks, 13 for pn_tx_max_blks*/ +#define AM65_CPSW_PN_TX_RX_MAX_BLKS_IET 0xD07 + +/* Slave IET Stats. register offsets */ +#define AM65_CPSW_STATN_IET_RX_ASSEMBLY_ERROR 0x140 +#define AM65_CPSW_STATN_IET_RX_ASSEMBLY_OK 0x144 +#define AM65_CPSW_STATN_IET_RX_SMD_ERROR 0x148 +#define AM65_CPSW_STATN_IET_RX_FRAG 0x14c +#define AM65_CPSW_STATN_IET_TX_HOLD 0x150 +#define AM65_CPSW_STATN_IET_TX_FRAG 0x154 + +/* number of priority queues per port FIFO */ +#define AM65_CPSW_PN_FIFO_PRIO_NUM 8 #endif /* AM65_CPSW_QOS_H_ */ diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 9d535ae59626..d5b75af163d3 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -93,12 +93,13 @@ static void gelic_card_get_ether_port_status(struct gelic_card *card, * gelic_descr_get_status -- returns the status of a descriptor * @descr: descriptor to look at * - * returns the status as in the dmac_cmd_status field of the descriptor + * returns the status as in the hw_regs.dmac_cmd_status field of the descriptor */ static enum gelic_descr_dma_status gelic_descr_get_status(struct gelic_descr *descr) { - return be32_to_cpu(descr->dmac_cmd_status) & GELIC_DESCR_DMA_STAT_MASK; + return be32_to_cpu(descr->hw_regs.dmac_cmd_status) & + GELIC_DESCR_DMA_STAT_MASK; } static int gelic_card_set_link_mode(struct gelic_card *card, int mode) @@ -152,15 +153,15 @@ static void gelic_card_enable_rxdmac(struct gelic_card *card) if (gelic_descr_get_status(card->rx_chain.head) != GELIC_DESCR_DMA_CARDOWNED) { printk(KERN_ERR "%s: status=%x\n", __func__, - be32_to_cpu(card->rx_chain.head->dmac_cmd_status)); + be32_to_cpu(card->rx_chain.head->hw_regs.dmac_cmd_status)); printk(KERN_ERR "%s: nextphy=%x\n", __func__, - be32_to_cpu(card->rx_chain.head->next_descr_addr)); + be32_to_cpu(card->rx_chain.head->hw_regs.next_descr_addr)); printk(KERN_ERR "%s: head=%p\n", __func__, card->rx_chain.head); } #endif status = lv1_net_start_rx_dma(bus_id(card), dev_id(card), - card->rx_chain.head->bus_addr, 0); + card->rx_chain.head->link.cpu_addr, 0); if (status) dev_info(ctodev(card), "lv1_net_start_rx_dma failed, status=%d\n", status); @@ -195,8 +196,8 @@ static void gelic_card_disable_rxdmac(struct gelic_card *card) static void gelic_descr_set_status(struct gelic_descr *descr, enum gelic_descr_dma_status status) { - descr->dmac_cmd_status = cpu_to_be32(status | - (be32_to_cpu(descr->dmac_cmd_status) & + descr->hw_regs.dmac_cmd_status = cpu_to_be32(status | + (be32_to_cpu(descr->hw_regs.dmac_cmd_status) & ~GELIC_DESCR_DMA_STAT_MASK)); /* * dma_cmd_status field is used to indicate whether the descriptor @@ -224,13 +225,14 @@ static void gelic_card_reset_chain(struct gelic_card *card, for (descr = start_descr; start_descr != descr->next; descr++) { gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED); - descr->next_descr_addr = cpu_to_be32(descr->next->bus_addr); + descr->hw_regs.next_descr_addr + = cpu_to_be32(descr->next->link.cpu_addr); } chain->head = start_descr; chain->tail = (descr - 1); - (descr - 1)->next_descr_addr = 0; + (descr - 1)->hw_regs.next_descr_addr = 0; } void gelic_card_up(struct gelic_card *card) @@ -286,10 +288,12 @@ static void gelic_card_free_chain(struct gelic_card *card, { struct gelic_descr *descr; - for (descr = descr_in; descr && descr->bus_addr; descr = descr->next) { - dma_unmap_single(ctodev(card), descr->bus_addr, - GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL); - descr->bus_addr = 0; + for (descr = descr_in; descr && descr->link.cpu_addr; + descr = descr->next) { + dma_unmap_single(ctodev(card), descr->link.cpu_addr, + descr->link.size, DMA_BIDIRECTIONAL); + descr->link.cpu_addr = 0; + descr->link.size = 0; } } @@ -317,17 +321,21 @@ static int gelic_card_init_chain(struct gelic_card *card, /* set up the hardware pointers in each descriptor */ for (i = 0; i < no; i++, descr++) { - dma_addr_t cpu_addr; - gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE); - cpu_addr = dma_map_single(ctodev(card), descr, - GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL); + descr->link.size = sizeof(struct gelic_hw_regs); + descr->link.cpu_addr = dma_map_single(ctodev(card), descr, + descr->link.size, DMA_BIDIRECTIONAL); - if (dma_mapping_error(ctodev(card), cpu_addr)) - goto iommu_error; + if (dma_mapping_error(ctodev(card), descr->link.cpu_addr)) { + for (i--, descr--; 0 <= i; i--, descr--) { + dma_unmap_single(ctodev(card), + descr->link.cpu_addr, descr->link.size, + DMA_BIDIRECTIONAL); + } + return -ENOMEM; + } - descr->bus_addr = cpu_to_be32(cpu_addr); descr->next = descr + 1; descr->prev = descr - 1; } @@ -338,24 +346,17 @@ static int gelic_card_init_chain(struct gelic_card *card, /* chain bus addr of hw descriptor */ descr = start_descr; for (i = 0; i < no; i++, descr++) { - descr->next_descr_addr = cpu_to_be32(descr->next->bus_addr); + descr->hw_regs.next_descr_addr = + cpu_to_be32(descr->next->link.cpu_addr); } chain->head = start_descr; chain->tail = start_descr; /* do not chain last hw descriptor */ - (descr - 1)->next_descr_addr = 0; + (descr - 1)->hw_regs.next_descr_addr = 0; return 0; - -iommu_error: - for (i--, descr--; 0 <= i; i--, descr--) - if (descr->bus_addr) - dma_unmap_single(ctodev(card), descr->bus_addr, - GELIC_DESCR_SIZE, - DMA_BIDIRECTIONAL); - return -ENOMEM; } /** @@ -385,14 +386,16 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); if (!descr->skb) { - descr->buf_addr = 0; /* tell DMAC don't touch memory */ + descr->hw_regs.payload.dev_addr = 0; /* tell DMAC don't touch memory */ return -ENOMEM; } - descr->buf_size = cpu_to_be32(rx_skb_size); - descr->dmac_cmd_status = 0; - descr->result_size = 0; - descr->valid_size = 0; - descr->data_error = 0; + descr->hw_regs.dmac_cmd_status = 0; + descr->hw_regs.result_size = 0; + descr->hw_regs.valid_size = 0; + descr->hw_regs.data_error = 0; + descr->hw_regs.payload.dev_addr = 0; + descr->hw_regs.payload.size = 0; + descr->skb = NULL; offset = ((unsigned long)descr->skb->data) & (GELIC_NET_RXBUF_ALIGN - 1); @@ -401,7 +404,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, /* io-mmu-map the skb */ cpu_addr = dma_map_single(ctodev(card), descr->skb->data, GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE); - descr->buf_addr = cpu_to_be32(cpu_addr); + descr->hw_regs.payload.dev_addr = cpu_to_be32(cpu_addr); if (dma_mapping_error(ctodev(card), cpu_addr)) { dev_kfree_skb_any(descr->skb); descr->skb = NULL; @@ -409,10 +412,14 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, "%s:Could not iommu-map rx buffer\n", __func__); gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE); return -ENOMEM; - } else { - gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED); - return 0; } + + descr->hw_regs.payload.size = cpu_to_be32(GELIC_NET_MAX_FRAME); + descr->hw_regs.payload.dev_addr = cpu_to_be32(cpu_addr); + + gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED); + + return 0; } /** @@ -427,14 +434,15 @@ static void gelic_card_release_rx_chain(struct gelic_card *card) do { if (descr->skb) { dma_unmap_single(ctodev(card), - be32_to_cpu(descr->buf_addr), - descr->skb->len, - DMA_FROM_DEVICE); - descr->buf_addr = 0; + be32_to_cpu(descr->hw_regs.payload.dev_addr), + descr->skb->len, + DMA_FROM_DEVICE); + descr->hw_regs.payload.dev_addr = 0; + descr->hw_regs.payload.size = 0; dev_kfree_skb_any(descr->skb); descr->skb = NULL; gelic_descr_set_status(descr, - GELIC_DESCR_DMA_NOT_IN_USE); + GELIC_DESCR_DMA_NOT_IN_USE); } descr = descr->next; } while (descr != card->rx_chain.head); @@ -496,19 +504,20 @@ static void gelic_descr_release_tx(struct gelic_card *card, { struct sk_buff *skb = descr->skb; - BUG_ON(!(be32_to_cpu(descr->data_status) & GELIC_DESCR_TX_TAIL)); + BUG_ON(!(be32_to_cpu(descr->hw_regs.data_status) & GELIC_DESCR_TX_TAIL)); - dma_unmap_single(ctodev(card), be32_to_cpu(descr->buf_addr), skb->len, - DMA_TO_DEVICE); + dma_unmap_single(ctodev(card), + be32_to_cpu(descr->hw_regs.payload.dev_addr), skb->len, + DMA_TO_DEVICE); dev_kfree_skb_any(skb); - descr->buf_addr = 0; - descr->buf_size = 0; - descr->next_descr_addr = 0; - descr->result_size = 0; - descr->valid_size = 0; - descr->data_status = 0; - descr->data_error = 0; + descr->hw_regs.payload.dev_addr = 0; + descr->hw_regs.payload.size = 0; + descr->hw_regs.next_descr_addr = 0; + descr->hw_regs.result_size = 0; + descr->hw_regs.valid_size = 0; + descr->hw_regs.data_status = 0; + descr->hw_regs.data_error = 0; descr->skb = NULL; /* set descr status */ @@ -701,7 +710,7 @@ static void gelic_descr_set_tx_cmdstat(struct gelic_descr *descr, struct sk_buff *skb) { if (skb->ip_summed != CHECKSUM_PARTIAL) - descr->dmac_cmd_status = + descr->hw_regs.dmac_cmd_status = cpu_to_be32(GELIC_DESCR_DMA_CMD_NO_CHKSUM | GELIC_DESCR_TX_DMA_FRAME_TAIL); else { @@ -709,19 +718,19 @@ static void gelic_descr_set_tx_cmdstat(struct gelic_descr *descr, * if yes: tcp? udp? */ if (skb->protocol == htons(ETH_P_IP)) { if (ip_hdr(skb)->protocol == IPPROTO_TCP) - descr->dmac_cmd_status = + descr->hw_regs.dmac_cmd_status = cpu_to_be32(GELIC_DESCR_DMA_CMD_TCP_CHKSUM | GELIC_DESCR_TX_DMA_FRAME_TAIL); else if (ip_hdr(skb)->protocol == IPPROTO_UDP) - descr->dmac_cmd_status = + descr->hw_regs.dmac_cmd_status = cpu_to_be32(GELIC_DESCR_DMA_CMD_UDP_CHKSUM | GELIC_DESCR_TX_DMA_FRAME_TAIL); else /* * the stack should checksum non-tcp and non-udp * packets on his own: NETIF_F_IP_CSUM */ - descr->dmac_cmd_status = + descr->hw_regs.dmac_cmd_status = cpu_to_be32(GELIC_DESCR_DMA_CMD_NO_CHKSUM | GELIC_DESCR_TX_DMA_FRAME_TAIL); } @@ -789,11 +798,11 @@ static int gelic_descr_prepare_tx(struct gelic_card *card, return -ENOMEM; } - descr->buf_addr = cpu_to_be32(buf); - descr->buf_size = cpu_to_be32(skb->len); + descr->hw_regs.payload.dev_addr = cpu_to_be32(buf); + descr->hw_regs.payload.size = cpu_to_be32(skb->len); descr->skb = skb; - descr->data_status = 0; - descr->next_descr_addr = 0; /* terminate hw descr */ + descr->hw_regs.data_status = 0; + descr->hw_regs.next_descr_addr = 0; /* terminate hw descr */ gelic_descr_set_tx_cmdstat(descr, skb); /* bump free descriptor pointer */ @@ -818,7 +827,7 @@ static int gelic_card_kick_txdma(struct gelic_card *card, if (gelic_descr_get_status(descr) == GELIC_DESCR_DMA_CARDOWNED) { card->tx_dma_progress = 1; status = lv1_net_start_tx_dma(bus_id(card), dev_id(card), - descr->bus_addr, 0); + descr->link.cpu_addr, 0); if (status) { card->tx_dma_progress = 0; dev_info(ctodev(card), "lv1_net_start_txdma failed," \ @@ -871,7 +880,8 @@ netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) * link this prepared descriptor to previous one * to achieve high performance */ - descr->prev->next_descr_addr = cpu_to_be32(descr->bus_addr); + descr->prev->hw_regs.next_descr_addr = + cpu_to_be32(descr->link.cpu_addr); /* * as hardware descriptor is modified in the above lines, * ensure that the hardware sees it @@ -884,12 +894,12 @@ netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) */ netdev->stats.tx_dropped++; /* don't trigger BUG_ON() in gelic_descr_release_tx */ - descr->data_status = cpu_to_be32(GELIC_DESCR_TX_TAIL); + descr->hw_regs.data_status = cpu_to_be32(GELIC_DESCR_TX_TAIL); gelic_descr_release_tx(card, descr); /* reset head */ card->tx_chain.head = descr; /* reset hw termination */ - descr->prev->next_descr_addr = 0; + descr->prev->hw_regs.next_descr_addr = 0; dev_info(ctodev(card), "%s: kick failure\n", __func__); } @@ -914,21 +924,21 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr, struct sk_buff *skb = descr->skb; u32 data_status, data_error; - data_status = be32_to_cpu(descr->data_status); - data_error = be32_to_cpu(descr->data_error); + data_status = be32_to_cpu(descr->hw_regs.data_status); + data_error = be32_to_cpu(descr->hw_regs.data_error); /* unmap skb buffer */ - dma_unmap_single(ctodev(card), be32_to_cpu(descr->buf_addr), - GELIC_NET_MAX_FRAME, - DMA_FROM_DEVICE); - - skb_put(skb, be32_to_cpu(descr->valid_size)? - be32_to_cpu(descr->valid_size) : - be32_to_cpu(descr->result_size)); - if (!descr->valid_size) + dma_unmap_single(ctodev(card), + be32_to_cpu(descr->hw_regs.payload.dev_addr), + be32_to_cpu(descr->hw_regs.payload.size), DMA_FROM_DEVICE); + + skb_put(skb, be32_to_cpu(descr->hw_regs.valid_size)? + be32_to_cpu(descr->hw_regs.valid_size) : + be32_to_cpu(descr->hw_regs.result_size)); + if (!descr->hw_regs.valid_size) dev_info(ctodev(card), "buffer full %x %x %x\n", - be32_to_cpu(descr->result_size), - be32_to_cpu(descr->buf_size), - be32_to_cpu(descr->dmac_cmd_status)); + be32_to_cpu(descr->hw_regs.result_size), + be32_to_cpu(descr->hw_regs.payload.size), + be32_to_cpu(descr->hw_regs.dmac_cmd_status)); descr->skb = NULL; /* @@ -1039,14 +1049,14 @@ refill: /* is the current descriptor terminated with next_descr == NULL? */ dmac_chain_ended = - be32_to_cpu(descr->dmac_cmd_status) & + be32_to_cpu(descr->hw_regs.dmac_cmd_status) & GELIC_DESCR_RX_DMA_CHAIN_END; /* * So that always DMAC can see the end * of the descriptor chain to avoid * from unwanted DMAC overrun. */ - descr->next_descr_addr = 0; + descr->hw_regs.next_descr_addr = 0; /* change the descriptor state: */ gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE); @@ -1063,7 +1073,8 @@ refill: /* * Set this descriptor the end of the chain. */ - descr->prev->next_descr_addr = cpu_to_be32(descr->bus_addr); + descr->prev->hw_regs.next_descr_addr = + cpu_to_be32(descr->link.cpu_addr); /* * If dmac chain was met, DMAC stopped. diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 0d98defb011e..f1f78de7705d 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -221,29 +221,35 @@ enum gelic_lv1_phy { GELIC_LV1_PHY_ETHERNET_0 = 0x0000000000000002L, }; -/* size of hardware part of gelic descriptor */ -#define GELIC_DESCR_SIZE (32) - enum gelic_port_type { GELIC_PORT_ETHERNET_0 = 0, GELIC_PORT_WIRELESS = 1, GELIC_PORT_MAX }; -struct gelic_descr { - /* as defined by the hardware */ - __be32 buf_addr; - __be32 buf_size; +/* As defined by the gelic hardware device. */ +struct gelic_hw_regs { + struct { + __be32 dev_addr; + __be32 size; + } __packed payload; __be32 next_descr_addr; __be32 dmac_cmd_status; __be32 result_size; __be32 valid_size; /* all zeroes for tx */ __be32 data_status; __be32 data_error; /* all zeroes for tx */ +} __packed; - /* used in the driver */ +struct gelic_chain_link { + dma_addr_t cpu_addr; + unsigned int size; +}; + +struct gelic_descr { + struct gelic_hw_regs hw_regs; + struct gelic_chain_link link; struct sk_buff *skb; - dma_addr_t bus_addr; struct gelic_descr *next; struct gelic_descr *prev; } __attribute__((aligned(32))); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index 6e9e5f01c152..cc3bec42ed8e 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -8,6 +8,7 @@ #include "wx_type.h" #include "wx_ethtool.h" #include "wx_hw.h" +#include "wx_lib.h" struct wx_stats { char stat_string[ETH_GSTRING_LEN]; @@ -185,3 +186,238 @@ void wx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) } } EXPORT_SYMBOL(wx_get_drvinfo); + +int wx_nway_reset(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + + return phylink_ethtool_nway_reset(wx->phylink); +} +EXPORT_SYMBOL(wx_nway_reset); + +int wx_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct wx *wx = netdev_priv(netdev); + + return phylink_ethtool_ksettings_get(wx->phylink, cmd); +} +EXPORT_SYMBOL(wx_get_link_ksettings); + +int wx_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct wx *wx = netdev_priv(netdev); + + return phylink_ethtool_ksettings_set(wx->phylink, cmd); +} +EXPORT_SYMBOL(wx_set_link_ksettings); + +void wx_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct wx *wx = netdev_priv(netdev); + + phylink_ethtool_get_pauseparam(wx->phylink, pause); +} +EXPORT_SYMBOL(wx_get_pauseparam); + +int wx_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct wx *wx = netdev_priv(netdev); + + return phylink_ethtool_set_pauseparam(wx->phylink, pause); +} +EXPORT_SYMBOL(wx_set_pauseparam); + +void wx_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct wx *wx = netdev_priv(netdev); + + ring->rx_max_pending = WX_MAX_RXD; + ring->tx_max_pending = WX_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = wx->rx_ring_count; + ring->tx_pending = wx->tx_ring_count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} +EXPORT_SYMBOL(wx_get_ringparam); + +int wx_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct wx *wx = netdev_priv(netdev); + + ec->tx_max_coalesced_frames_irq = wx->tx_work_limit; + /* only valid if in constant ITR mode */ + if (wx->rx_itr_setting <= 1) + ec->rx_coalesce_usecs = wx->rx_itr_setting; + else + ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2; + + /* if in mixed tx/rx queues per vector mode, report only rx settings */ + if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) + return 0; + + /* only valid if in constant ITR mode */ + if (wx->tx_itr_setting <= 1) + ec->tx_coalesce_usecs = wx->tx_itr_setting; + else + ec->tx_coalesce_usecs = wx->tx_itr_setting >> 2; + + return 0; +} +EXPORT_SYMBOL(wx_get_coalesce); + +int wx_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) +{ + struct wx *wx = netdev_priv(netdev); + u16 tx_itr_param, rx_itr_param; + struct wx_q_vector *q_vector; + u16 max_eitr; + int i; + + if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) { + /* reject Tx specific changes in case of mixed RxTx vectors */ + if (ec->tx_coalesce_usecs) + return -EOPNOTSUPP; + } + + if (ec->tx_max_coalesced_frames_irq) + wx->tx_work_limit = ec->tx_max_coalesced_frames_irq; + + if (wx->mac.type == wx_mac_sp) + max_eitr = WX_SP_MAX_EITR; + else + max_eitr = WX_EM_MAX_EITR; + + if ((ec->rx_coalesce_usecs > (max_eitr >> 2)) || + (ec->tx_coalesce_usecs > (max_eitr >> 2))) + return -EINVAL; + + if (ec->rx_coalesce_usecs > 1) + wx->rx_itr_setting = ec->rx_coalesce_usecs << 2; + else + wx->rx_itr_setting = ec->rx_coalesce_usecs; + + if (wx->rx_itr_setting == 1) + rx_itr_param = WX_20K_ITR; + else + rx_itr_param = wx->rx_itr_setting; + + if (ec->tx_coalesce_usecs > 1) + wx->tx_itr_setting = ec->tx_coalesce_usecs << 2; + else + wx->tx_itr_setting = ec->tx_coalesce_usecs; + + if (wx->tx_itr_setting == 1) { + if (wx->mac.type == wx_mac_sp) + tx_itr_param = WX_12K_ITR; + else + tx_itr_param = WX_20K_ITR; + } else { + tx_itr_param = wx->tx_itr_setting; + } + + /* mixed Rx/Tx */ + if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) + wx->tx_itr_setting = wx->rx_itr_setting; + + for (i = 0; i < wx->num_q_vectors; i++) { + q_vector = wx->q_vector[i]; + if (q_vector->tx.count && !q_vector->rx.count) + /* tx only */ + q_vector->itr = tx_itr_param; + else + /* rx only or mixed */ + q_vector->itr = rx_itr_param; + wx_write_eitr(q_vector); + } + + return 0; +} +EXPORT_SYMBOL(wx_set_coalesce); + +static unsigned int wx_max_channels(struct wx *wx) +{ + unsigned int max_combined; + + if (!wx->msix_q_entries) { + /* We only support one q_vector without MSI-X */ + max_combined = 1; + } else { + /* support up to max allowed queues with RSS */ + if (wx->mac.type == wx_mac_sp) + max_combined = 63; + else + max_combined = 8; + } + + return max_combined; +} + +void wx_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct wx *wx = netdev_priv(dev); + + /* report maximum channels */ + ch->max_combined = wx_max_channels(wx); + + /* report info for other vector */ + if (wx->msix_q_entries) { + ch->max_other = 1; + ch->other_count = 1; + } + + /* record RSS queues */ + ch->combined_count = wx->ring_feature[RING_F_RSS].indices; +} +EXPORT_SYMBOL(wx_get_channels); + +int wx_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + unsigned int count = ch->combined_count; + struct wx *wx = netdev_priv(dev); + + /* verify other_count has not changed */ + if (ch->other_count != 1) + return -EINVAL; + + /* verify the number of channels does not exceed hardware limits */ + if (count > wx_max_channels(wx)) + return -EINVAL; + + wx->ring_feature[RING_F_RSS].limit = count; + + return 0; +} +EXPORT_SYMBOL(wx_set_channels); + +u32 wx_get_msglevel(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + + return wx->msg_enable; +} +EXPORT_SYMBOL(wx_get_msglevel); + +void wx_set_msglevel(struct net_device *netdev, u32 data) +{ + struct wx *wx = netdev_priv(netdev); + + wx->msg_enable = data; +} +EXPORT_SYMBOL(wx_set_msglevel); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h index 16d1a09369a6..600c3b597d1a 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h @@ -13,4 +13,31 @@ void wx_get_mac_stats(struct net_device *netdev, void wx_get_pause_stats(struct net_device *netdev, struct ethtool_pause_stats *stats); void wx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info); +int wx_nway_reset(struct net_device *netdev); +int wx_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd); +int wx_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd); +void wx_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause); +int wx_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause); +void wx_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack); +int wx_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack); +int wx_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack); +void wx_get_channels(struct net_device *dev, + struct ethtool_channels *ch); +int wx_set_channels(struct net_device *dev, + struct ethtool_channels *ch); +u32 wx_get_msglevel(struct net_device *netdev); +void wx_set_msglevel(struct net_device *netdev, u32 data); #endif /* _WX_ETHTOOL_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 533e912af089..1db754615cca 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -149,9 +149,9 @@ void wx_irq_disable(struct wx *wx) int vector; for (vector = 0; vector < wx->num_q_vectors; vector++) - synchronize_irq(wx->msix_entries[vector].vector); + synchronize_irq(wx->msix_q_entries[vector].vector); - synchronize_irq(wx->msix_entries[vector].vector); + synchronize_irq(wx->msix_entry->vector); } else { synchronize_irq(pdev->irq); } @@ -1158,6 +1158,81 @@ static void wx_set_rxpba(struct wx *wx) wr32(wx, WX_TDM_PB_THRE(0), txpbthresh); } +#define WX_ETH_FRAMING 20 + +/** + * wx_hpbthresh - calculate high water mark for flow control + * + * @wx: board private structure to calculate for + **/ +static int wx_hpbthresh(struct wx *wx) +{ + struct net_device *dev = wx->netdev; + int link, tc, kb, marker; + u32 dv_id, rx_pba; + + /* Calculate max LAN frame size */ + link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + WX_ETH_FRAMING; + tc = link; + + /* Calculate delay value for device */ + dv_id = WX_DV(link, tc); + + /* Delay value is calculated in bit times convert to KB */ + kb = WX_BT2KB(dv_id); + rx_pba = rd32(wx, WX_RDB_PB_SZ(0)) >> WX_RDB_PB_SZ_SHIFT; + + marker = rx_pba - kb; + + /* It is possible that the packet buffer is not large enough + * to provide required headroom. In this case throw an error + * to user and a do the best we can. + */ + if (marker < 0) { + dev_warn(&wx->pdev->dev, + "Packet Buffer can not provide enough headroom to support flow control. Decrease MTU or number of traffic classes\n"); + marker = tc + 1; + } + + return marker; +} + +/** + * wx_lpbthresh - calculate low water mark for flow control + * + * @wx: board private structure to calculate for + **/ +static int wx_lpbthresh(struct wx *wx) +{ + struct net_device *dev = wx->netdev; + u32 dv_id; + int tc; + + /* Calculate max LAN frame size */ + tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + + /* Calculate delay value for device */ + dv_id = WX_LOW_DV(tc); + + /* Delay value is calculated in bit times convert to KB */ + return WX_BT2KB(dv_id); +} + +/** + * wx_pbthresh_setup - calculate and setup high low water marks + * + * @wx: board private structure to calculate for + **/ +static void wx_pbthresh_setup(struct wx *wx) +{ + wx->fc.high_water = wx_hpbthresh(wx); + wx->fc.low_water = wx_lpbthresh(wx); + + /* Low water marks must not be larger than high water marks */ + if (wx->fc.low_water > wx->fc.high_water) + wx->fc.low_water = 0; +} + static void wx_configure_port(struct wx *wx) { u32 value, i; @@ -1522,6 +1597,72 @@ static void wx_restore_vlan(struct wx *wx) wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), vid); } +static void wx_store_reta(struct wx *wx) +{ + u8 *indir_tbl = wx->rss_indir_tbl; + u32 reta = 0; + u32 i; + + /* Fill out the redirection table as follows: + * - 8 bit wide entries containing 4 bit RSS index + */ + for (i = 0; i < WX_MAX_RETA_ENTRIES; i++) { + reta |= indir_tbl[i] << (i & 0x3) * 8; + if ((i & 3) == 3) { + wr32(wx, WX_RDB_RSSTBL(i >> 2), reta); + reta = 0; + } + } +} + +static void wx_setup_reta(struct wx *wx) +{ + u16 rss_i = wx->ring_feature[RING_F_RSS].indices; + u32 random_key_size = WX_RSS_KEY_SIZE / 4; + u32 i, j; + + /* Fill out hash function seeds */ + for (i = 0; i < random_key_size; i++) + wr32(wx, WX_RDB_RSSRK(i), wx->rss_key[i]); + + /* Fill out redirection table */ + memset(wx->rss_indir_tbl, 0, sizeof(wx->rss_indir_tbl)); + + for (i = 0, j = 0; i < WX_MAX_RETA_ENTRIES; i++, j++) { + if (j == rss_i) + j = 0; + + wx->rss_indir_tbl[i] = j; + } + + wx_store_reta(wx); +} + +static void wx_setup_mrqc(struct wx *wx) +{ + u32 rss_field = 0; + + /* Disable indicating checksum in descriptor, enables RSS hash */ + wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_PCSD, WX_PSR_CTL_PCSD); + + /* Perform hash on these packet types */ + rss_field = WX_RDB_RA_CTL_RSS_IPV4 | + WX_RDB_RA_CTL_RSS_IPV4_TCP | + WX_RDB_RA_CTL_RSS_IPV4_UDP | + WX_RDB_RA_CTL_RSS_IPV6 | + WX_RDB_RA_CTL_RSS_IPV6_TCP | + WX_RDB_RA_CTL_RSS_IPV6_UDP; + + netdev_rss_key_fill(wx->rss_key, sizeof(wx->rss_key)); + + wx_setup_reta(wx); + + if (wx->rss_enabled) + rss_field |= WX_RDB_RA_CTL_RSS_EN; + + wr32(wx, WX_RDB_RA_CTL, rss_field); +} + /** * wx_configure_rx - Configure Receive Unit after Reset * @wx: pointer to private structure @@ -1554,6 +1695,8 @@ void wx_configure_rx(struct wx *wx) wr32(wx, WX_PSR_CTL, psrctl); } + wx_setup_mrqc(wx); + /* set_rx_buffer_len must be called before ring initialization */ wx_set_rx_buffer_len(wx); @@ -1584,6 +1727,7 @@ static void wx_configure_isb(struct wx *wx) void wx_configure(struct wx *wx) { wx_set_rxpba(wx); + wx_pbthresh_setup(wx); wx_configure_port(wx); wx_set_rx_mode(wx->netdev); @@ -1750,6 +1894,28 @@ int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count) } EXPORT_SYMBOL(wx_get_pcie_msix_counts); +/** + * wx_init_rss_key - Initialize wx RSS key + * @wx: device handle + * + * Allocates and initializes the RSS key if it is not allocated. + **/ +static int wx_init_rss_key(struct wx *wx) +{ + u32 *rss_key; + + if (!wx->rss_key) { + rss_key = kzalloc(WX_RSS_KEY_SIZE, GFP_KERNEL); + if (unlikely(!rss_key)) + return -ENOMEM; + + netdev_rss_key_fill(rss_key, WX_RSS_KEY_SIZE); + wx->rss_key = rss_key; + } + + return 0; +} + int wx_sw_init(struct wx *wx) { struct pci_dev *pdev = wx->pdev; @@ -1777,14 +1943,23 @@ int wx_sw_init(struct wx *wx) wx->subsystem_device_id = swab16((u16)ssid); } + err = wx_init_rss_key(wx); + if (err < 0) { + wx_err(wx, "rss key allocation failed\n"); + return err; + } + wx->mac_table = kcalloc(wx->mac.num_rar_entries, sizeof(struct wx_mac_addr), GFP_KERNEL); if (!wx->mac_table) { wx_err(wx, "mac_table allocation failed\n"); + kfree(wx->rss_key); return -ENOMEM; } + wx->msix_in_use = false; + return 0; } EXPORT_SYMBOL(wx_sw_init); @@ -2003,6 +2178,102 @@ int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) } EXPORT_SYMBOL(wx_vlan_rx_kill_vid); +static void wx_enable_rx_drop(struct wx *wx, struct wx_ring *ring) +{ + u16 reg_idx = ring->reg_idx; + u32 srrctl; + + srrctl = rd32(wx, WX_PX_RR_CFG(reg_idx)); + srrctl |= WX_PX_RR_CFG_DROP_EN; + + wr32(wx, WX_PX_RR_CFG(reg_idx), srrctl); +} + +static void wx_disable_rx_drop(struct wx *wx, struct wx_ring *ring) +{ + u16 reg_idx = ring->reg_idx; + u32 srrctl; + + srrctl = rd32(wx, WX_PX_RR_CFG(reg_idx)); + srrctl &= ~WX_PX_RR_CFG_DROP_EN; + + wr32(wx, WX_PX_RR_CFG(reg_idx), srrctl); +} + +int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause) +{ + u16 pause_time = WX_DEFAULT_FCPAUSE; + u32 mflcn_reg, fccfg_reg, reg; + u32 fcrtl, fcrth; + int i; + + /* Low water mark of zero causes XOFF floods */ + if (tx_pause && wx->fc.high_water) { + if (!wx->fc.low_water || wx->fc.low_water >= wx->fc.high_water) { + wx_err(wx, "Invalid water mark configuration\n"); + return -EINVAL; + } + } + + /* Disable any previous flow control settings */ + mflcn_reg = rd32(wx, WX_MAC_RX_FLOW_CTRL); + mflcn_reg &= ~WX_MAC_RX_FLOW_CTRL_RFE; + + fccfg_reg = rd32(wx, WX_RDB_RFCC); + fccfg_reg &= ~WX_RDB_RFCC_RFCE_802_3X; + + if (rx_pause) + mflcn_reg |= WX_MAC_RX_FLOW_CTRL_RFE; + if (tx_pause) + fccfg_reg |= WX_RDB_RFCC_RFCE_802_3X; + + /* Set 802.3x based flow control settings. */ + wr32(wx, WX_MAC_RX_FLOW_CTRL, mflcn_reg); + wr32(wx, WX_RDB_RFCC, fccfg_reg); + + /* Set up and enable Rx high/low water mark thresholds, enable XON. */ + if (tx_pause && wx->fc.high_water) { + fcrtl = (wx->fc.low_water << 10) | WX_RDB_RFCL_XONE; + wr32(wx, WX_RDB_RFCL, fcrtl); + fcrth = (wx->fc.high_water << 10) | WX_RDB_RFCH_XOFFE; + } else { + wr32(wx, WX_RDB_RFCL, 0); + /* In order to prevent Tx hangs when the internal Tx + * switch is enabled we must set the high water mark + * to the Rx packet buffer size - 24KB. This allows + * the Tx switch to function even under heavy Rx + * workloads. + */ + fcrth = rd32(wx, WX_RDB_PB_SZ(0)) - 24576; + } + + wr32(wx, WX_RDB_RFCH, fcrth); + + /* Configure pause time */ + reg = pause_time * 0x00010001; + wr32(wx, WX_RDB_RFCV, reg); + + /* Configure flow control refresh threshold value */ + wr32(wx, WX_RDB_RFCRT, pause_time / 2); + + /* We should set the drop enable bit if: + * Number of Rx queues > 1 and flow control is disabled + * + * This allows us to avoid head of line blocking for security + * and performance reasons. + */ + if (wx->num_rx_queues > 1 && !tx_pause) { + for (i = 0; i < wx->num_rx_queues; i++) + wx_enable_rx_drop(wx, wx->rx_ring[i]); + } else { + for (i = 0; i < wx->num_rx_queues; i++) + wx_disable_rx_drop(wx, wx->rx_ring[i]); + } + + return 0; +} +EXPORT_SYMBOL(wx_fc_enable); + /** * wx_update_stats - Update the board statistics counters. * @wx: board private structure diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 12c20a7c364d..9e219fa717a2 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -41,6 +41,7 @@ int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count); int wx_sw_init(struct wx *wx); int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); +int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause); void wx_update_stats(struct wx *wx); void wx_clear_hw_cntrs(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index a5a50b5a8816..23355cc408fd 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -160,60 +160,6 @@ static __le32 wx_test_staterr(union wx_rx_desc *rx_desc, return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); } -static bool wx_can_reuse_rx_page(struct wx_rx_buffer *rx_buffer, - int rx_buffer_pgcnt) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) - return false; -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(pagecnt_bias == 1)) { - page_ref_add(page, USHRT_MAX - 1); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - -/** - * wx_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void wx_reuse_rx_page(struct wx_ring *rx_ring, - struct wx_rx_buffer *old_buff) -{ - u16 nta = rx_ring->next_to_alloc; - struct wx_rx_buffer *new_buff; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->page = old_buff->page; - new_buff->page_dma = old_buff->page_dma; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - static void wx_dma_sync_frag(struct wx_ring *rx_ring, struct wx_rx_buffer *rx_buffer) { @@ -270,8 +216,6 @@ static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring, size, DMA_FROM_DEVICE); skip_sync: - rx_buffer->pagecnt_bias--; - return rx_buffer; } @@ -280,19 +224,9 @@ static void wx_put_rx_buffer(struct wx_ring *rx_ring, struct sk_buff *skb, int rx_buffer_pgcnt) { - if (wx_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { - /* hand second half of page back to the ring */ - wx_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma) - /* the page has been released from the ring */ - WX_CB(skb)->page_released = true; - else - page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); - - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } + if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma) + /* the page has been released from the ring */ + WX_CB(skb)->page_released = true; /* clear contents of rx_buffer */ rx_buffer->page = NULL; @@ -335,11 +269,12 @@ static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring, if (size <= WX_RXBUFFER_256) { memcpy(__skb_put(skb, size), page_addr, ALIGN(size, sizeof(long))); - rx_buffer->pagecnt_bias++; - + page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, true); return skb; } + skb_mark_for_recycle(skb); + if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)) WX_CB(skb)->dma = rx_buffer->dma; @@ -382,8 +317,6 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring, bi->page_dma = dma; bi->page = page; bi->page_offset = 0; - page_ref_add(page, USHRT_MAX - 1); - bi->pagecnt_bias = USHRT_MAX; return true; } @@ -723,7 +656,6 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; break; } @@ -1636,8 +1568,14 @@ EXPORT_SYMBOL(wx_napi_disable_all); **/ static void wx_set_rss_queues(struct wx *wx) { - wx->num_rx_queues = wx->mac.max_rx_queues; - wx->num_tx_queues = wx->mac.max_tx_queues; + struct wx_ring_feature *f; + + /* set mask for 16 queue limit of RSS */ + f = &wx->ring_feature[RING_F_RSS]; + f->indices = f->limit; + + wx->num_rx_queues = f->limit; + wx->num_tx_queues = f->limit; } static void wx_set_num_queues(struct wx *wx) @@ -1663,35 +1601,51 @@ static int wx_acquire_msix_vectors(struct wx *wx) struct irq_affinity affd = {0, }; int nvecs, i; - nvecs = min_t(int, num_online_cpus(), wx->mac.max_msix_vectors); + /* We start by asking for one vector per queue pair */ + nvecs = max(wx->num_rx_queues, wx->num_tx_queues); + nvecs = min_t(int, nvecs, num_online_cpus()); + nvecs = min_t(int, nvecs, wx->mac.max_msix_vectors); - wx->msix_entries = kcalloc(nvecs, - sizeof(struct msix_entry), - GFP_KERNEL); - if (!wx->msix_entries) + wx->msix_q_entries = kcalloc(nvecs, sizeof(struct msix_entry), + GFP_KERNEL); + if (!wx->msix_q_entries) return -ENOMEM; + /* One for non-queue interrupts */ + nvecs += 1; + + if (!wx->msix_in_use) { + wx->msix_entry = kcalloc(1, sizeof(struct msix_entry), + GFP_KERNEL); + if (!wx->msix_entry) { + kfree(wx->msix_q_entries); + wx->msix_q_entries = NULL; + return -ENOMEM; + } + } + nvecs = pci_alloc_irq_vectors_affinity(wx->pdev, nvecs, nvecs, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &affd); if (nvecs < 0) { wx_err(wx, "Failed to allocate MSI-X interrupts. Err: %d\n", nvecs); - kfree(wx->msix_entries); - wx->msix_entries = NULL; + kfree(wx->msix_q_entries); + wx->msix_q_entries = NULL; + kfree(wx->msix_entry); + wx->msix_entry = NULL; return nvecs; } + wx->msix_entry->entry = 0; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); + nvecs -= 1; for (i = 0; i < nvecs; i++) { - wx->msix_entries[i].entry = i; - wx->msix_entries[i].vector = pci_irq_vector(wx->pdev, i); + wx->msix_q_entries[i].entry = i; + wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1); } - /* one for msix_other */ - nvecs -= 1; wx->num_q_vectors = nvecs; - wx->num_rx_queues = nvecs; - wx->num_tx_queues = nvecs; return 0; } @@ -1713,9 +1667,11 @@ static int wx_set_interrupt_capability(struct wx *wx) if (ret == 0 || (ret == -ENOMEM)) return ret; - wx->num_rx_queues = 1; - wx->num_tx_queues = 1; - wx->num_q_vectors = 1; + /* Disable RSS */ + dev_warn(&wx->pdev->dev, "Disabling RSS support\n"); + wx->ring_feature[RING_F_RSS].limit = 1; + + wx_set_num_queues(wx); /* minmum one for queue, one for misc*/ nvecs = 1; @@ -1973,8 +1929,12 @@ void wx_reset_interrupt_capability(struct wx *wx) return; if (pdev->msix_enabled) { - kfree(wx->msix_entries); - wx->msix_entries = NULL; + kfree(wx->msix_q_entries); + wx->msix_q_entries = NULL; + if (!wx->msix_in_use) { + kfree(wx->msix_entry); + wx->msix_entry = NULL; + } } pci_free_irq_vectors(wx->pdev); } @@ -2046,7 +2006,7 @@ void wx_free_irq(struct wx *wx) for (vector = 0; vector < wx->num_q_vectors; vector++) { struct wx_q_vector *q_vector = wx->q_vector[vector]; - struct msix_entry *entry = &wx->msix_entries[vector]; + struct msix_entry *entry = &wx->msix_q_entries[vector]; /* free only the irqs that were actually requested */ if (!q_vector->rx.ring && !q_vector->tx.ring) @@ -2056,7 +2016,7 @@ void wx_free_irq(struct wx *wx) } if (wx->mac.type == wx_mac_em) - free_irq(wx->msix_entries[vector].vector, wx); + free_irq(wx->msix_entry->vector, wx); } EXPORT_SYMBOL(wx_free_irq); @@ -2133,6 +2093,7 @@ static void wx_set_ivar(struct wx *wx, s8 direction, wr32(wx, WX_PX_MISC_IVAR, ivar); } else { /* tx or rx causes */ + msix_vector += 1; /* offset for queue vectors */ msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = ((16 * (queue & 1)) + (8 * direction)); ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); @@ -2150,7 +2111,7 @@ static void wx_set_ivar(struct wx *wx, s8 direction, * when it needs to update EITR registers at runtime. Hardware * specific quirks/differences are taken care of here. */ -static void wx_write_eitr(struct wx_q_vector *q_vector) +void wx_write_eitr(struct wx_q_vector *q_vector) { struct wx *wx = q_vector->wx; int v_idx = q_vector->v_idx; @@ -2163,7 +2124,7 @@ static void wx_write_eitr(struct wx_q_vector *q_vector) itr_reg |= WX_PX_ITR_CNT_WDIS; - wr32(wx, WX_PX_ITR(v_idx), itr_reg); + wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg); } /** @@ -2209,9 +2170,9 @@ void wx_configure_vectors(struct wx *wx) wx_write_eitr(q_vector); } - wx_set_ivar(wx, -1, 0, v_idx); + wx_set_ivar(wx, -1, 0, 0); if (pdev->msix_enabled) - wr32(wx, WX_PX_ITR(v_idx), 1950); + wr32(wx, WX_PX_ITR(0), 1950); } EXPORT_SYMBOL(wx_configure_vectors); @@ -2248,8 +2209,6 @@ static void wx_clean_rx_ring(struct wx_ring *rx_ring) /* free resources associated with mapping */ page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); i++; rx_buffer++; @@ -2726,11 +2685,14 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) netdev_features_t changed = netdev->features ^ features; struct wx *wx = netdev_priv(netdev); - if (changed & NETIF_F_RXHASH) + if (features & NETIF_F_RXHASH) { wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, WX_RDB_RA_CTL_RSS_EN); - else + wx->rss_enabled = true; + } else { wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, 0); + wx->rss_enabled = false; + } if (changed & (NETIF_F_HW_VLAN_CTAG_RX | @@ -2741,4 +2703,70 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) } EXPORT_SYMBOL(wx_set_features); +void wx_set_ring(struct wx *wx, u32 new_tx_count, + u32 new_rx_count, struct wx_ring *temp_ring) +{ + int i, err = 0; + + /* Setup new Tx resources and free the old Tx resources in that order. + * We can then assign the new resources to the rings via a memcpy. + * The advantage to this approach is that we are guaranteed to still + * have resources even in the case of an allocation failure. + */ + if (new_tx_count != wx->tx_ring_count) { + for (i = 0; i < wx->num_tx_queues; i++) { + memcpy(&temp_ring[i], wx->tx_ring[i], + sizeof(struct wx_ring)); + + temp_ring[i].count = new_tx_count; + err = wx_setup_tx_resources(&temp_ring[i]); + if (err) { + wx_err(wx, "setup new tx resources failed, keep using the old config\n"); + while (i) { + i--; + wx_free_tx_resources(&temp_ring[i]); + } + return; + } + } + + for (i = 0; i < wx->num_tx_queues; i++) { + wx_free_tx_resources(wx->tx_ring[i]); + + memcpy(wx->tx_ring[i], &temp_ring[i], + sizeof(struct wx_ring)); + } + + wx->tx_ring_count = new_tx_count; + } + + /* Repeat the process for the Rx rings if needed */ + if (new_rx_count != wx->rx_ring_count) { + for (i = 0; i < wx->num_rx_queues; i++) { + memcpy(&temp_ring[i], wx->rx_ring[i], + sizeof(struct wx_ring)); + + temp_ring[i].count = new_rx_count; + err = wx_setup_rx_resources(&temp_ring[i]); + if (err) { + wx_err(wx, "setup new rx resources failed, keep using the old config\n"); + while (i) { + i--; + wx_free_rx_resources(&temp_ring[i]); + } + return; + } + } + + for (i = 0; i < wx->num_rx_queues; i++) { + wx_free_rx_resources(wx->rx_ring[i]); + memcpy(wx->rx_ring[i], &temp_ring[i], + sizeof(struct wx_ring)); + } + + wx->rx_ring_count = new_rx_count; + } +} +EXPORT_SYMBOL(wx_set_ring); + MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h index df1f4a5951f0..ec909e876720 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h @@ -21,6 +21,7 @@ void wx_free_irq(struct wx *wx); int wx_setup_isb_resources(struct wx *wx); void wx_free_isb_resources(struct wx *wx); u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx); +void wx_write_eitr(struct wx_q_vector *q_vector); void wx_configure_vectors(struct wx *wx); void wx_clean_all_rx_rings(struct wx *wx); void wx_clean_all_tx_rings(struct wx *wx); @@ -29,5 +30,7 @@ int wx_setup_resources(struct wx *wx); void wx_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats); int wx_set_features(struct net_device *netdev, netdev_features_t features); +void wx_set_ring(struct wx *wx, u32 new_tx_count, + u32 new_rx_count, struct wx_ring *temp_ring); #endif /* _NGBE_LIB_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 165e82de772e..b4dc4f341117 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -7,6 +7,7 @@ #include <linux/bitfield.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> +#include <linux/phylink.h> #include <net/ip.h> #define WX_NCSI_SUP 0x8000 @@ -130,6 +131,15 @@ #define WX_RDB_PFCMACDAH 0x19214 #define WX_RDB_LXOFFTXC 0x19218 #define WX_RDB_LXONTXC 0x1921C +/* Flow Control Registers */ +#define WX_RDB_RFCV 0x19200 +#define WX_RDB_RFCL 0x19220 +#define WX_RDB_RFCL_XONE BIT(31) +#define WX_RDB_RFCH 0x19260 +#define WX_RDB_RFCH_XOFFE BIT(31) +#define WX_RDB_RFCRT 0x192A0 +#define WX_RDB_RFCC 0x192A4 +#define WX_RDB_RFCC_RFCE_802_3X BIT(3) /* ring assignment */ #define WX_RDB_PL_CFG(_i) (0x19300 + ((_i) * 4)) #define WX_RDB_PL_CFG_L4HDR BIT(1) @@ -137,8 +147,16 @@ #define WX_RDB_PL_CFG_L2HDR BIT(3) #define WX_RDB_PL_CFG_TUN_TUNHDR BIT(4) #define WX_RDB_PL_CFG_TUN_OUTL2HDR BIT(5) +#define WX_RDB_RSSTBL(_i) (0x19400 + ((_i) * 4)) +#define WX_RDB_RSSRK(_i) (0x19480 + ((_i) * 4)) #define WX_RDB_RA_CTL 0x194F4 #define WX_RDB_RA_CTL_RSS_EN BIT(2) /* RSS Enable */ +#define WX_RDB_RA_CTL_RSS_IPV4_TCP BIT(16) +#define WX_RDB_RA_CTL_RSS_IPV4 BIT(17) +#define WX_RDB_RA_CTL_RSS_IPV6 BIT(20) +#define WX_RDB_RA_CTL_RSS_IPV6_TCP BIT(21) +#define WX_RDB_RA_CTL_RSS_IPV4_UDP BIT(22) +#define WX_RDB_RA_CTL_RSS_IPV6_UDP BIT(23) /******************************* PSR Registers *******************************/ /* psr control */ @@ -305,6 +323,7 @@ enum WX_MSCA_CMD_value { #define WX_PX_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */ #define WX_7K_ITR 595 #define WX_12K_ITR 336 +#define WX_20K_ITR 200 #define WX_SP_MAX_EITR 0x00000FF8U #define WX_EM_MAX_EITR 0x00007FFCU @@ -330,6 +349,7 @@ enum WX_MSCA_CMD_value { #define WX_PX_MPRC(_i) (0x01020 + ((_i) * 0x40)) /* PX_RR_CFG bit definitions */ #define WX_PX_RR_CFG_VLAN BIT(31) +#define WX_PX_RR_CFG_DROP_EN BIT(30) #define WX_PX_RR_CFG_SPLIT_MODE BIT(26) #define WX_PX_RR_CFG_RR_THER_SHIFT 16 #define WX_PX_RR_CFG_RR_HDR_SZ GENMASK(15, 12) @@ -367,8 +387,46 @@ enum WX_MSCA_CMD_value { #define WX_MAC_STATE_MODIFIED 0x2 #define WX_MAC_STATE_IN_USE 0x4 +/* BitTimes (BT) conversion */ +#define WX_BT2KB(BT) (((BT) + (8 * 1024 - 1)) / (8 * 1024)) +#define WX_B2BT(BT) ((BT) * 8) + +/* Calculate Delay to respond to PFC */ +#define WX_PFC_D 672 +/* Calculate Cable Delay */ +#define WX_CABLE_DC 5556 /* Delay Copper */ +/* Calculate Delay incurred from higher layer */ +#define WX_HD 6144 + +/* Calculate Interface Delay */ +#define WX_PHY_D 12800 +#define WX_MAC_D 4096 +#define WX_XAUI_D (2 * 1024) +#define WX_ID (WX_MAC_D + WX_XAUI_D + WX_PHY_D) +/* Calculate PCI Bus delay for low thresholds */ +#define WX_PCI_DELAY 10000 + +/* Calculate delay value in bit times */ +#define WX_DV(_max_frame_link, _max_frame_tc) \ + ((36 * (WX_B2BT(_max_frame_link) + WX_PFC_D + \ + (2 * WX_CABLE_DC) + (2 * WX_ID) + WX_HD) / 25 + 1) + \ + 2 * WX_B2BT(_max_frame_tc)) + +/* Calculate low threshold delay values */ +#define WX_LOW_DV(_max_frame_tc) \ + (2 * (2 * WX_B2BT(_max_frame_tc) + (36 * WX_PCI_DELAY / 25) + 1)) + +/* flow control */ +#define WX_DEFAULT_FCPAUSE 0xFFFF + #define WX_MAX_RXD 8192 #define WX_MAX_TXD 8192 +#define WX_MIN_RXD 128 +#define WX_MIN_TXD 128 + +/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 8 +#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 8 #define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ #define VMDQ_P(p) p @@ -787,7 +845,6 @@ struct wx_rx_buffer { dma_addr_t page_dma; struct page *page; unsigned int page_offset; - u16 pagecnt_bias; }; struct wx_queue_stats { @@ -872,6 +929,19 @@ struct wx_q_vector { struct wx_ring ring[] ____cacheline_internodealigned_in_smp; }; +struct wx_ring_feature { + u16 limit; /* upper limit on feature indices */ + u16 indices; /* current value of indices */ + u16 mask; /* Mask used for feature to ring mapping */ + u16 offset; /* offset to start of feature */ +}; + +enum wx_ring_f_enum { + RING_F_NONE = 0, + RING_F_RSS, + RING_F_ARRAY_SIZE /* must be last in enum set */ +}; + enum wx_isb_idx { WX_ISB_HEADER, WX_ISB_MISC, @@ -880,6 +950,11 @@ enum wx_isb_idx { WX_ISB_MAX }; +struct wx_fc_info { + u32 high_water; /* Flow Ctrl High-water */ + u32 low_water; /* Flow Ctrl Low-water */ +}; + /* Statistics counters collected by the MAC */ struct wx_hw_stats { u64 gprc; @@ -920,6 +995,7 @@ struct wx { enum sp_media_type media_type; struct wx_eeprom_info eeprom; struct wx_addr_filter_info addr_ctrl; + struct wx_fc_info fc; struct wx_mac_addr *mac_table; u16 device_id; u16 vendor_id; @@ -940,6 +1016,8 @@ struct wx { int speed; int duplex; struct phy_device *phydev; + struct phylink *phylink; + struct phylink_config phylink_config; bool wol_hw_supported; bool ncsi_enabled; @@ -967,7 +1045,10 @@ struct wx { struct wx_q_vector *q_vector[64]; unsigned int queues_per_pool; - struct msix_entry *msix_entries; + struct msix_entry *msix_q_entries; + struct msix_entry *msix_entry; + bool msix_in_use; + struct wx_ring_feature ring_feature[RING_F_ARRAY_SIZE]; /* misc interrupt status block */ dma_addr_t isb_dma; @@ -975,8 +1056,9 @@ struct wx { u32 isb_tag[WX_ISB_MAX]; #define WX_MAX_RETA_ENTRIES 128 +#define WX_RSS_INDIR_TBL_MAX 64 u8 rss_indir_tbl[WX_MAX_RETA_ENTRIES]; - + bool rss_enabled; #define WX_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ u32 *rss_key; u32 wol; @@ -993,7 +1075,7 @@ struct wx { }; #define WX_INTR_ALL (~0ULL) -#define WX_INTR_Q(i) BIT(i) +#define WX_INTR_Q(i) BIT((i) + 1) /* register operations */ #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) @@ -1045,4 +1127,9 @@ rd64(struct wx *wx, u32 reg) #define wx_dbg(wx, fmt, arg...) \ dev_dbg(&(wx)->pdev->dev, fmt, ##arg) +static inline struct wx *phylink_to_wx(struct phylink_config *config) +{ + return container_of(config, struct wx, phylink_config); +} + #endif /* _WX_TYPE_H_ */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index afbdf6919071..786a652ae64f 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -7,7 +7,10 @@ #include "../libwx/wx_ethtool.h" #include "../libwx/wx_type.h" +#include "../libwx/wx_lib.h" +#include "../libwx/wx_hw.h" #include "ngbe_ethtool.h" +#include "ngbe_type.h" static void ngbe_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) @@ -41,12 +44,75 @@ static int ngbe_set_wol(struct net_device *netdev, return 0; } +static int ngbe_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) +{ + struct wx *wx = netdev_priv(netdev); + u32 new_rx_count, new_tx_count; + struct wx_ring *temp_ring; + int i; + + new_tx_count = clamp_t(u32, ring->tx_pending, WX_MIN_TXD, WX_MAX_TXD); + new_tx_count = ALIGN(new_tx_count, WX_REQ_TX_DESCRIPTOR_MULTIPLE); + + new_rx_count = clamp_t(u32, ring->rx_pending, WX_MIN_RXD, WX_MAX_RXD); + new_rx_count = ALIGN(new_rx_count, WX_REQ_RX_DESCRIPTOR_MULTIPLE); + + if (new_tx_count == wx->tx_ring_count && + new_rx_count == wx->rx_ring_count) + return 0; + + if (!netif_running(wx->netdev)) { + for (i = 0; i < wx->num_tx_queues; i++) + wx->tx_ring[i]->count = new_tx_count; + for (i = 0; i < wx->num_rx_queues; i++) + wx->rx_ring[i]->count = new_rx_count; + wx->tx_ring_count = new_tx_count; + wx->rx_ring_count = new_rx_count; + + return 0; + } + + /* allocate temporary buffer to store rings in */ + i = max_t(int, wx->num_tx_queues, wx->num_rx_queues); + temp_ring = kvmalloc_array(i, sizeof(struct wx_ring), GFP_KERNEL); + if (!temp_ring) + return -ENOMEM; + + ngbe_down(wx); + + wx_set_ring(wx, new_tx_count, new_rx_count, temp_ring); + kvfree(temp_ring); + + wx_configure(wx); + ngbe_up(wx); + + return 0; +} + +static int ngbe_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + int err; + + err = wx_set_channels(dev, ch); + if (err < 0) + return err; + + /* use setup TC to update any traffic class queue mapping */ + return ngbe_setup_tc(dev, netdev_get_num_tc(dev)); +} + static const struct ethtool_ops ngbe_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, .get_drvinfo = wx_get_drvinfo, .get_link = ethtool_op_get_link, - .get_link_ksettings = phy_ethtool_get_link_ksettings, - .set_link_ksettings = phy_ethtool_set_link_ksettings, - .nway_reset = phy_ethtool_nway_reset, + .get_link_ksettings = wx_get_link_ksettings, + .set_link_ksettings = wx_set_link_ksettings, + .nway_reset = wx_nway_reset, .get_wol = ngbe_get_wol, .set_wol = ngbe_set_wol, .get_sset_count = wx_get_sset_count, @@ -54,6 +120,16 @@ static const struct ethtool_ops ngbe_ethtool_ops = { .get_ethtool_stats = wx_get_ethtool_stats, .get_eth_mac_stats = wx_get_mac_stats, .get_pause_stats = wx_get_pause_stats, + .get_pauseparam = wx_get_pauseparam, + .set_pauseparam = wx_set_pauseparam, + .get_ringparam = wx_get_ringparam, + .set_ringparam = ngbe_set_ringparam, + .get_coalesce = wx_get_coalesce, + .set_coalesce = wx_set_coalesce, + .get_channels = wx_get_channels, + .set_channels = ngbe_set_channels, + .get_msglevel = wx_get_msglevel, + .set_msglevel = wx_set_msglevel, }; void ngbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index a5c623fd023e..fdd6b4f70b7a 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -80,28 +80,6 @@ static void ngbe_init_type_code(struct wx *wx) } /** - * ngbe_init_rss_key - Initialize wx RSS key - * @wx: device handle - * - * Allocates and initializes the RSS key if it is not allocated. - **/ -static inline int ngbe_init_rss_key(struct wx *wx) -{ - u32 *rss_key; - - if (!wx->rss_key) { - rss_key = kzalloc(WX_RSS_KEY_SIZE, GFP_KERNEL); - if (unlikely(!rss_key)) - return -ENOMEM; - - netdev_rss_key_fill(rss_key, WX_RSS_KEY_SIZE); - wx->rss_key = rss_key; - } - - return 0; -} - -/** * ngbe_sw_init - Initialize general software structures * @wx: board private structure to initialize **/ @@ -134,8 +112,9 @@ static int ngbe_sw_init(struct wx *wx) dev_err(&pdev->dev, "Do not support MSI-X\n"); wx->mac.max_msix_vectors = msix_count; - if (ngbe_init_rss_key(wx)) - return -ENOMEM; + wx->ring_feature[RING_F_RSS].limit = min_t(int, NGBE_MAX_RSS_INDICES, + num_online_cpus()); + wx->rss_enabled = true; /* enable itr by default in dynamic mode */ wx->rx_itr_setting = 1; @@ -175,7 +154,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues) if (queues) wx_intr_enable(wx, NGBE_INTR_ALL); else - wx_intr_enable(wx, NGBE_INTR_MISC(wx)); + wx_intr_enable(wx, NGBE_INTR_MISC); } /** @@ -241,7 +220,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) for (vector = 0; vector < wx->num_q_vectors; vector++) { struct wx_q_vector *q_vector = wx->q_vector[vector]; - struct msix_entry *entry = &wx->msix_entries[vector]; + struct msix_entry *entry = &wx->msix_q_entries[vector]; if (q_vector->tx.ring && q_vector->rx.ring) snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -259,7 +238,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) } } - err = request_irq(wx->msix_entries[vector].vector, + err = request_irq(wx->msix_entry->vector, ngbe_msix_other, 0, netdev->name, wx); if (err) { @@ -272,7 +251,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) free_queue_irqs: while (vector) { vector--; - free_irq(wx->msix_entries[vector].vector, + free_irq(wx->msix_q_entries[vector].vector, wx->q_vector[vector]); } wx_reset_interrupt_capability(wx); @@ -334,15 +313,15 @@ static void ngbe_disable_device(struct wx *wx) wx_update_stats(wx); } -static void ngbe_down(struct wx *wx) +void ngbe_down(struct wx *wx) { - phy_stop(wx->phydev); + phylink_stop(wx->phylink); ngbe_disable_device(wx); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); } -static void ngbe_up(struct wx *wx) +void ngbe_up(struct wx *wx) { wx_configure_vectors(wx); @@ -359,7 +338,7 @@ static void ngbe_up(struct wx *wx) if (wx->gpio_ctrl) ngbe_sfp_modules_txrx_powerctl(wx, true); - phy_start(wx->phydev); + phylink_start(wx->phylink); } /** @@ -388,7 +367,7 @@ static int ngbe_open(struct net_device *netdev) if (err) goto err_free_resources; - err = ngbe_phy_connect(wx); + err = phylink_connect_phy(wx->phylink, wx->phydev); if (err) goto err_free_irq; @@ -404,7 +383,7 @@ static int ngbe_open(struct net_device *netdev) return 0; err_dis_phy: - phy_disconnect(wx->phydev); + phylink_disconnect_phy(wx->phylink); err_free_irq: wx_free_irq(wx); err_free_resources: @@ -430,7 +409,7 @@ static int ngbe_close(struct net_device *netdev) ngbe_down(wx); wx_free_irq(wx); wx_free_resources(wx); - phy_disconnect(wx->phydev); + phylink_disconnect_phy(wx->phylink); wx_control_hw(wx, false); return 0; @@ -480,6 +459,39 @@ static void ngbe_shutdown(struct pci_dev *pdev) } } +/** + * ngbe_setup_tc - routine to configure net_device for multiple traffic + * classes. + * + * @dev: net device to configure + * @tc: number of traffic classes to enable + */ +int ngbe_setup_tc(struct net_device *dev, u8 tc) +{ + struct wx *wx = netdev_priv(dev); + + /* Hardware has to reinitialize queues and interrupts to + * match packet buffer alignment. Unfortunately, the + * hardware is not flexible enough to do this dynamically. + */ + if (netif_running(dev)) + ngbe_close(dev); + + wx_clear_interrupt_scheme(wx); + + if (tc) + netdev_set_num_tc(dev, tc); + else + netdev_reset_tc(dev); + + wx_init_interrupt_scheme(wx); + + if (netif_running(dev)) + ngbe_open(dev); + + return 0; +} + static const struct net_device_ops ngbe_netdev_ops = { .ndo_open = ngbe_open, .ndo_stop = ngbe_close, @@ -681,6 +693,7 @@ static int ngbe_probe(struct pci_dev *pdev, return 0; err_register: + phylink_destroy(wx->phylink); wx_control_hw(wx, false); err_clear_interrupt_scheme: wx_clear_interrupt_scheme(wx); @@ -710,9 +723,11 @@ static void ngbe_remove(struct pci_dev *pdev) netdev = wx->netdev; unregister_netdev(netdev); + phylink_destroy(wx->phylink); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); + kfree(wx->rss_key); kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index 6302ecca71bb..ec54b18c5fe7 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -56,22 +56,28 @@ static int ngbe_phy_write_reg_c22(struct mii_bus *bus, int phy_addr, return ret; } -static void ngbe_handle_link_change(struct net_device *dev) +static void ngbe_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) { - struct wx *wx = netdev_priv(dev); - struct phy_device *phydev; +} + +static void ngbe_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) +{ +} + +static void ngbe_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct wx *wx = phylink_to_wx(config); u32 lan_speed, reg; - phydev = wx->phydev; - if (!(wx->link != phydev->link || - wx->speed != phydev->speed || - wx->duplex != phydev->duplex)) - return; + wx_fc_enable(wx, tx_pause, rx_pause); - wx->link = phydev->link; - wx->speed = phydev->speed; - wx->duplex = phydev->duplex; - switch (phydev->speed) { + switch (speed) { case SPEED_10: lan_speed = 0; break; @@ -83,54 +89,51 @@ static void ngbe_handle_link_change(struct net_device *dev) lan_speed = 2; break; } + wr32m(wx, NGBE_CFG_LAN_SPEED, 0x3, lan_speed); - if (phydev->link) { - reg = rd32(wx, WX_MAC_TX_CFG); - reg &= ~WX_MAC_TX_CFG_SPEED_MASK; - reg |= WX_MAC_TX_CFG_SPEED_1G | WX_MAC_TX_CFG_TE; - wr32(wx, WX_MAC_TX_CFG, reg); - /* Re configure MAC RX */ - reg = rd32(wx, WX_MAC_RX_CFG); - wr32(wx, WX_MAC_RX_CFG, reg); - wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); - reg = rd32(wx, WX_MAC_WDG_TIMEOUT); - wr32(wx, WX_MAC_WDG_TIMEOUT, reg); - } - phy_print_status(phydev); + reg = rd32(wx, WX_MAC_TX_CFG); + reg &= ~WX_MAC_TX_CFG_SPEED_MASK; + reg |= WX_MAC_TX_CFG_SPEED_1G | WX_MAC_TX_CFG_TE; + wr32(wx, WX_MAC_TX_CFG, reg); + + /* Re configure MAC Rx */ + reg = rd32(wx, WX_MAC_RX_CFG); + wr32(wx, WX_MAC_RX_CFG, reg); + wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); + reg = rd32(wx, WX_MAC_WDG_TIMEOUT); + wr32(wx, WX_MAC_WDG_TIMEOUT, reg); } -int ngbe_phy_connect(struct wx *wx) +static const struct phylink_mac_ops ngbe_mac_ops = { + .mac_config = ngbe_mac_config, + .mac_link_down = ngbe_mac_link_down, + .mac_link_up = ngbe_mac_link_up, +}; + +static int ngbe_phylink_init(struct wx *wx) { - int ret; + struct phylink_config *config; + phy_interface_t phy_mode; + struct phylink *phylink; - ret = phy_connect_direct(wx->netdev, - wx->phydev, - ngbe_handle_link_change, - PHY_INTERFACE_MODE_RGMII_ID); - if (ret) { - wx_err(wx, "PHY connect failed.\n"); - return ret; - } + config = &wx->phylink_config; + config->dev = &wx->netdev->dev; + config->type = PHYLINK_NETDEV; + config->mac_capabilities = MAC_1000FD | MAC_100FD | MAC_10FD | + MAC_SYM_PAUSE | MAC_ASYM_PAUSE; + config->mac_managed_pm = true; - return 0; -} + phy_mode = PHY_INTERFACE_MODE_RGMII_ID; + __set_bit(PHY_INTERFACE_MODE_RGMII_ID, config->supported_interfaces); -static void ngbe_phy_fixup(struct wx *wx) -{ - struct phy_device *phydev = wx->phydev; - struct ethtool_eee eee; - - phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); - phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); - phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); - - phydev->mac_managed_pm = true; - if (wx->mac_type != em_mac_type_mdi) - return; - /* disable EEE, internal phy does not support eee */ - memset(&eee, 0, sizeof(eee)); - phy_ethtool_set_eee(phydev, &eee); + phylink = phylink_create(config, NULL, phy_mode, &ngbe_mac_ops); + if (IS_ERR(phylink)) + return PTR_ERR(phylink); + + wx->phylink = phylink; + + return 0; } int ngbe_mdio_init(struct wx *wx) @@ -165,11 +168,16 @@ int ngbe_mdio_init(struct wx *wx) return -ENODEV; phy_attached_info(wx->phydev); - ngbe_phy_fixup(wx); wx->link = 0; wx->speed = 0; wx->duplex = 0; + ret = ngbe_phylink_init(wx); + if (ret) { + wx_err(wx, "failed to init phylink: %d\n", ret); + return ret; + } + return 0; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.h index 0a6400dd89c4..f610b771888a 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.h @@ -7,6 +7,5 @@ #ifndef _NGBE_MDIO_H_ #define _NGBE_MDIO_H_ -int ngbe_phy_connect(struct wx *wx); int ngbe_mdio_init(struct wx *wx); #endif /* _NGBE_MDIO_H_ */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index ff754d69bdf6..f48ed7fc1805 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -80,7 +80,7 @@ NGBE_PX_MISC_IEN_GPIO) #define NGBE_INTR_ALL 0x1FF -#define NGBE_INTR_MISC(A) BIT((A)->num_q_vectors) +#define NGBE_INTR_MISC BIT(0) #define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4)) #define NGBE_CFG_LAN_SPEED 0x14440 @@ -105,6 +105,7 @@ #define NGBE_FW_CMD_ST_FAIL 0x70657376 #define NGBE_MAX_FDIR_INDICES 7 +#define NGBE_MAX_RSS_INDICES 8 #define NGBE_MAX_RX_QUEUES (NGBE_MAX_FDIR_INDICES + 1) #define NGBE_MAX_TX_QUEUES (NGBE_MAX_FDIR_INDICES + 1) @@ -130,4 +131,8 @@ extern char ngbe_driver_name[]; +void ngbe_down(struct wx *wx); +void ngbe_up(struct wx *wx); +int ngbe_setup_tc(struct net_device *dev, u8 tc); + #endif /* _NGBE_TYPE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index 3f336a088e43..db675512ce4d 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -7,43 +7,93 @@ #include "../libwx/wx_ethtool.h" #include "../libwx/wx_type.h" +#include "../libwx/wx_lib.h" #include "txgbe_type.h" #include "txgbe_ethtool.h" -static int txgbe_nway_reset(struct net_device *netdev) +static int txgbe_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { - struct txgbe *txgbe = netdev_to_txgbe(netdev); + struct wx *wx = netdev_priv(netdev); + u32 new_rx_count, new_tx_count; + struct wx_ring *temp_ring; + int i; - return phylink_ethtool_nway_reset(txgbe->phylink); -} + new_tx_count = clamp_t(u32, ring->tx_pending, WX_MIN_TXD, WX_MAX_TXD); + new_tx_count = ALIGN(new_tx_count, WX_REQ_TX_DESCRIPTOR_MULTIPLE); -static int txgbe_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct txgbe *txgbe = netdev_to_txgbe(netdev); + new_rx_count = clamp_t(u32, ring->rx_pending, WX_MIN_RXD, WX_MAX_RXD); + new_rx_count = ALIGN(new_rx_count, WX_REQ_RX_DESCRIPTOR_MULTIPLE); + + if (new_tx_count == wx->tx_ring_count && + new_rx_count == wx->rx_ring_count) + return 0; + + if (!netif_running(wx->netdev)) { + for (i = 0; i < wx->num_tx_queues; i++) + wx->tx_ring[i]->count = new_tx_count; + for (i = 0; i < wx->num_rx_queues; i++) + wx->rx_ring[i]->count = new_rx_count; + wx->tx_ring_count = new_tx_count; + wx->rx_ring_count = new_rx_count; + + return 0; + } - return phylink_ethtool_ksettings_get(txgbe->phylink, cmd); + /* allocate temporary buffer to store rings in */ + i = max_t(int, wx->num_tx_queues, wx->num_rx_queues); + temp_ring = kvmalloc_array(i, sizeof(struct wx_ring), GFP_KERNEL); + if (!temp_ring) + return -ENOMEM; + + txgbe_down(wx); + + wx_set_ring(wx, new_tx_count, new_rx_count, temp_ring); + kvfree(temp_ring); + + txgbe_up(wx); + + return 0; } -static int txgbe_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) +static int txgbe_set_channels(struct net_device *dev, + struct ethtool_channels *ch) { - struct txgbe *txgbe = netdev_to_txgbe(netdev); + int err; + + err = wx_set_channels(dev, ch); + if (err < 0) + return err; - return phylink_ethtool_ksettings_set(txgbe->phylink, cmd); + /* use setup TC to update any traffic class queue mapping */ + return txgbe_setup_tc(dev, netdev_get_num_tc(dev)); } static const struct ethtool_ops txgbe_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, .get_drvinfo = wx_get_drvinfo, - .nway_reset = txgbe_nway_reset, + .nway_reset = wx_nway_reset, .get_link = ethtool_op_get_link, - .get_link_ksettings = txgbe_get_link_ksettings, - .set_link_ksettings = txgbe_set_link_ksettings, + .get_link_ksettings = wx_get_link_ksettings, + .set_link_ksettings = wx_set_link_ksettings, .get_sset_count = wx_get_sset_count, .get_strings = wx_get_strings, .get_ethtool_stats = wx_get_ethtool_stats, .get_eth_mac_stats = wx_get_mac_stats, .get_pause_stats = wx_get_pause_stats, + .get_pauseparam = wx_get_pauseparam, + .set_pauseparam = wx_set_pauseparam, + .get_ringparam = wx_get_ringparam, + .set_ringparam = txgbe_set_ringparam, + .get_coalesce = wx_get_coalesce, + .set_coalesce = wx_set_coalesce, + .get_channels = wx_get_channels, + .set_channels = txgbe_set_channels, + .get_msglevel = wx_get_msglevel, + .set_msglevel = wx_set_msglevel, }; void txgbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index a78da2309db5..3b151c410a5c 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -86,7 +86,7 @@ static void txgbe_irq_enable(struct wx *wx, bool queues) wr32(wx, WX_PX_MISC_IEN, TXGBE_PX_MISC_IEN_MASK); /* unmask interrupt */ - wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); + wx_intr_enable(wx, TXGBE_INTR_MISC); if (queues) wx_intr_enable(wx, TXGBE_INTR_QALL(wx)); } @@ -145,7 +145,7 @@ static int txgbe_request_msix_irqs(struct wx *wx) for (vector = 0; vector < wx->num_q_vectors; vector++) { struct wx_q_vector *q_vector = wx->q_vector[vector]; - struct msix_entry *entry = &wx->msix_entries[vector]; + struct msix_entry *entry = &wx->msix_q_entries[vector]; if (q_vector->tx.ring && q_vector->rx.ring) snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -168,7 +168,7 @@ static int txgbe_request_msix_irqs(struct wx *wx) free_queue_irqs: while (vector) { vector--; - free_irq(wx->msix_entries[vector].vector, + free_irq(wx->msix_q_entries[vector].vector, wx->q_vector[vector]); } wx_reset_interrupt_capability(wx); @@ -206,7 +206,6 @@ static int txgbe_request_irq(struct wx *wx) static void txgbe_up_complete(struct wx *wx) { struct net_device *netdev = wx->netdev; - struct txgbe *txgbe; wx_control_hw(wx, true); wx_configure_vectors(wx); @@ -215,8 +214,7 @@ static void txgbe_up_complete(struct wx *wx) smp_mb__before_atomic(); wx_napi_enable_all(wx); - txgbe = netdev_to_txgbe(netdev); - phylink_start(txgbe->phylink); + phylink_start(wx->phylink); /* clear any pending interrupts, may auto mask */ rd32(wx, WX_PX_IC(0)); @@ -290,18 +288,22 @@ static void txgbe_disable_device(struct wx *wx) wx_update_stats(wx); } -static void txgbe_down(struct wx *wx) +void txgbe_down(struct wx *wx) { - struct txgbe *txgbe = netdev_to_txgbe(wx->netdev); - txgbe_disable_device(wx); txgbe_reset(wx); - phylink_stop(txgbe->phylink); + phylink_stop(wx->phylink); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); } +void txgbe_up(struct wx *wx) +{ + wx_configure(wx); + txgbe_up_complete(wx); +} + /** * txgbe_init_type_code - Initialize the shared code * @wx: pointer to hardware structure @@ -376,6 +378,10 @@ static int txgbe_sw_init(struct wx *wx) wx_err(wx, "Do not support MSI-X\n"); wx->mac.max_msix_vectors = msix_count; + wx->ring_feature[RING_F_RSS].limit = min_t(int, TXGBE_MAX_RSS_INDICES, + num_online_cpus()); + wx->rss_enabled = true; + /* enable itr by default in dynamic mode */ wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; @@ -502,6 +508,41 @@ static void txgbe_shutdown(struct pci_dev *pdev) } } +/** + * txgbe_setup_tc - routine to configure net_device for multiple traffic + * classes. + * + * @dev: net device to configure + * @tc: number of traffic classes to enable + */ +int txgbe_setup_tc(struct net_device *dev, u8 tc) +{ + struct wx *wx = netdev_priv(dev); + + /* Hardware has to reinitialize queues and interrupts to + * match packet buffer alignment. Unfortunately, the + * hardware is not flexible enough to do this dynamically. + */ + if (netif_running(dev)) + txgbe_close(dev); + else + txgbe_reset(wx); + + wx_clear_interrupt_scheme(wx); + + if (tc) + netdev_set_num_tc(dev, tc); + else + netdev_reset_tc(dev); + + wx_init_interrupt_scheme(wx); + + if (netif_running(dev)) + txgbe_open(dev); + + return 0; +} + static const struct net_device_ops txgbe_netdev_ops = { .ndo_open = txgbe_open, .ndo_stop = txgbe_close, @@ -776,6 +817,7 @@ static void txgbe_remove(struct pci_dev *pdev) pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); + kfree(wx->rss_key); kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index b6c06adb8656..1b84d495d14e 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -159,7 +159,8 @@ static int txgbe_mdio_pcs_init(struct txgbe *txgbe) static struct phylink_pcs *txgbe_phylink_mac_select(struct phylink_config *config, phy_interface_t interface) { - struct txgbe *txgbe = netdev_to_txgbe(to_net_dev(config->dev)); + struct wx *wx = phylink_to_wx(config); + struct txgbe *txgbe = wx->priv; if (interface == PHY_INTERFACE_MODE_10GBASER) return &txgbe->xpcs->pcs; @@ -175,7 +176,7 @@ static void txgbe_mac_config(struct phylink_config *config, unsigned int mode, static void txgbe_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { - struct wx *wx = netdev_priv(to_net_dev(config->dev)); + struct wx *wx = phylink_to_wx(config); wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0); } @@ -186,9 +187,11 @@ static void txgbe_mac_link_up(struct phylink_config *config, int speed, int duplex, bool tx_pause, bool rx_pause) { - struct wx *wx = netdev_priv(to_net_dev(config->dev)); + struct wx *wx = phylink_to_wx(config); u32 txcfg, wdg; + wx_fc_enable(wx, tx_pause, rx_pause); + txcfg = rd32(wx, WX_MAC_TX_CFG); txcfg &= ~WX_MAC_TX_CFG_SPEED_MASK; @@ -217,7 +220,7 @@ static void txgbe_mac_link_up(struct phylink_config *config, static int txgbe_mac_prepare(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { - struct wx *wx = netdev_priv(to_net_dev(config->dev)); + struct wx *wx = phylink_to_wx(config); wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0); wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_RE, 0); @@ -228,7 +231,7 @@ static int txgbe_mac_prepare(struct phylink_config *config, unsigned int mode, static int txgbe_mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { - struct wx *wx = netdev_priv(to_net_dev(config->dev)); + struct wx *wx = phylink_to_wx(config); txgbe_enable_sec_tx_path(wx); wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_RE, WX_MAC_RX_CFG_RE); @@ -253,10 +256,7 @@ static int txgbe_phylink_init(struct txgbe *txgbe) phy_interface_t phy_mode; struct phylink *phylink; - config = devm_kzalloc(&wx->pdev->dev, sizeof(*config), GFP_KERNEL); - if (!config) - return -ENOMEM; - + config = &wx->phylink_config; config->dev = &wx->netdev->dev; config->type = PHYLINK_NETDEV; config->mac_capabilities = MAC_10000FD | MAC_1000FD | MAC_100FD | @@ -287,7 +287,7 @@ static int txgbe_phylink_init(struct txgbe *txgbe) } } - txgbe->phylink = phylink; + wx->phylink = phylink; return 0; } @@ -483,11 +483,11 @@ static void txgbe_irq_handler(struct irq_desc *desc) TXGBE_PX_MISC_ETH_AN)) { u32 reg = rd32(wx, TXGBE_CFG_PORT_ST); - phylink_mac_change(txgbe->phylink, !!(reg & TXGBE_CFG_PORT_ST_LINK_UP)); + phylink_mac_change(wx->phylink, !!(reg & TXGBE_CFG_PORT_ST_LINK_UP)); } /* unmask interrupt */ - wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); + wx_intr_enable(wx, TXGBE_INTR_MISC); } static int txgbe_gpio_init(struct txgbe *txgbe) @@ -531,7 +531,12 @@ static int txgbe_gpio_init(struct txgbe *txgbe) sizeof(*girq->parents), GFP_KERNEL); if (!girq->parents) return -ENOMEM; - girq->parents[0] = wx->msix_entries[wx->num_q_vectors].vector; + + /* now only suuported on MSI-X interrupt */ + if (!wx->msix_entry) + return -EPERM; + + girq->parents[0] = wx->msix_entry->vector; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; @@ -701,6 +706,7 @@ static int txgbe_ext_phy_init(struct txgbe *txgbe) int txgbe_init_phy(struct txgbe *txgbe) { + struct wx *wx = txgbe->wx; int ret; if (txgbe->wx->media_type == sp_media_copper) @@ -708,46 +714,48 @@ int txgbe_init_phy(struct txgbe *txgbe) ret = txgbe_swnodes_register(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to register software nodes\n"); + wx_err(wx, "failed to register software nodes\n"); return ret; } ret = txgbe_mdio_pcs_init(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to init mdio pcs: %d\n", ret); + wx_err(wx, "failed to init mdio pcs: %d\n", ret); goto err_unregister_swnode; } ret = txgbe_phylink_init(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to init phylink\n"); + wx_err(wx, "failed to init phylink\n"); goto err_destroy_xpcs; } ret = txgbe_gpio_init(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to init gpio\n"); + wx_err(wx, "failed to init gpio\n"); goto err_destroy_phylink; } ret = txgbe_clock_register(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to register clock: %d\n", ret); + wx_err(wx, "failed to register clock: %d\n", ret); goto err_destroy_phylink; } ret = txgbe_i2c_register(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to init i2c interface: %d\n", ret); + wx_err(wx, "failed to init i2c interface: %d\n", ret); goto err_unregister_clk; } ret = txgbe_sfp_register(txgbe); if (ret) { - wx_err(txgbe->wx, "failed to register sfp\n"); + wx_err(wx, "failed to register sfp\n"); goto err_unregister_i2c; } + wx->msix_in_use = true; + return 0; err_unregister_i2c: @@ -756,7 +764,7 @@ err_unregister_clk: clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); err_destroy_phylink: - phylink_destroy(txgbe->phylink); + phylink_destroy(wx->phylink); err_destroy_xpcs: xpcs_destroy(txgbe->xpcs); err_unregister_swnode: @@ -768,8 +776,8 @@ err_unregister_swnode: void txgbe_remove_phy(struct txgbe *txgbe) { if (txgbe->wx->media_type == sp_media_copper) { - phylink_disconnect_phy(txgbe->phylink); - phylink_destroy(txgbe->phylink); + phylink_disconnect_phy(txgbe->wx->phylink); + phylink_destroy(txgbe->wx->phylink); return; } @@ -777,7 +785,8 @@ void txgbe_remove_phy(struct txgbe *txgbe) platform_device_unregister(txgbe->i2c_dev); clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); - phylink_destroy(txgbe->phylink); + phylink_destroy(txgbe->wx->phylink); xpcs_destroy(txgbe->xpcs); software_node_unregister_node_group(txgbe->nodes.group); + txgbe->wx->msix_in_use = false; } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 3ba9ce43f394..270a6fd9ad0b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -98,6 +98,7 @@ #define TXGBE_MAX_MSIX_VECTORS 64 #define TXGBE_MAX_FDIR_INDICES 63 +#define TXGBE_MAX_RSS_INDICES 63 #define TXGBE_MAX_RX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1) #define TXGBE_MAX_TX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1) @@ -122,19 +123,16 @@ #define TXGBE_DEFAULT_RX_WORK 128 #endif -#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors) -#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1) +#define TXGBE_INTR_MISC BIT(0) +#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1) #define TXGBE_MAX_EITR GENMASK(11, 3) extern char txgbe_driver_name[]; -static inline struct txgbe *netdev_to_txgbe(struct net_device *netdev) -{ - struct wx *wx = netdev_priv(netdev); - - return wx->priv; -} +void txgbe_down(struct wx *wx); +void txgbe_up(struct wx *wx); +int txgbe_setup_tc(struct net_device *dev, u8 tc); #define NODE_PROP(_NAME, _PROP) \ (const struct software_node) { \ @@ -175,7 +173,6 @@ struct txgbe { struct wx *wx; struct txgbe_nodes nodes; struct dw_xpcs *xpcs; - struct phylink *phylink; struct platform_device *sfp_dev; struct platform_device *i2c_dev; struct clk_lookup *clock; diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 523d13ee02bf..35e55f198e05 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -221,7 +221,7 @@ err_slave: return err; } -static int fakelb_remove(struct platform_device *pdev) +static void fakelb_remove(struct platform_device *pdev) { struct fakelb_phy *phy, *tmp; @@ -229,14 +229,13 @@ static int fakelb_remove(struct platform_device *pdev) list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); mutex_unlock(&fakelb_phys_lock); - return 0; } static struct platform_device *ieee802154fake_dev; static struct platform_driver ieee802154fake_driver = { .probe = fakelb_probe, - .remove = fakelb_remove, + .remove_new = fakelb_remove, .driver = { .name = "ieee802154fakelb", }, diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 31cba9aa7636..2c2483bbe780 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -1035,7 +1035,7 @@ err_slave: return err; } -static int hwsim_remove(struct platform_device *pdev) +static void hwsim_remove(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; @@ -1043,13 +1043,11 @@ static int hwsim_remove(struct platform_device *pdev) list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); - - return 0; } static struct platform_driver mac802154hwsim_driver = { .probe = hwsim_probe, - .remove = hwsim_remove, + .remove_new = hwsim_remove, .driver = { .name = "mac802154_hwsim", }, diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 9663050a852d..e34816638569 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -93,6 +93,8 @@ struct pcpu_secy_stats { * @secys: linked list of SecY's on the underlying device * @gro_cells: pointer to the Generic Receive Offload cell * @offload: status of offloading on the MACsec device + * @insert_tx_tag: when offloading, device requires to insert an + * additional tag */ struct macsec_dev { struct macsec_secy secy; @@ -102,6 +104,7 @@ struct macsec_dev { struct list_head secys; struct gro_cells gro_cells; enum macsec_offload offload; + bool insert_tx_tag; }; /** @@ -604,26 +607,11 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM || - skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) { - struct sk_buff *nskb = skb_copy_expand(skb, - MACSEC_NEEDED_HEADROOM, - MACSEC_NEEDED_TAILROOM, - GFP_ATOMIC); - if (likely(nskb)) { - consume_skb(skb); - skb = nskb; - } else { - macsec_txsa_put(tx_sa); - kfree_skb(skb); - return ERR_PTR(-ENOMEM); - } - } else { - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) { - macsec_txsa_put(tx_sa); - return ERR_PTR(-ENOMEM); - } + ret = skb_ensure_writable_head_tail(skb, dev); + if (unlikely(ret < 0)) { + macsec_txsa_put(tx_sa); + kfree_skb(skb); + return ERR_PTR(ret); } unprotected_len = skb->len; @@ -2583,6 +2571,33 @@ static bool macsec_is_configured(struct macsec_dev *macsec) return false; } +static bool macsec_needs_tx_tag(struct macsec_dev *macsec, + const struct macsec_ops *ops) +{ + return macsec->offload == MACSEC_OFFLOAD_PHY && + ops->mdo_insert_tx_tag; +} + +static void macsec_set_head_tail_room(struct net_device *dev) +{ + struct macsec_dev *macsec = macsec_priv(dev); + struct net_device *real_dev = macsec->real_dev; + int needed_headroom, needed_tailroom; + const struct macsec_ops *ops; + + ops = macsec_get_ops(macsec, NULL); + if (ops) { + needed_headroom = ops->needed_headroom; + needed_tailroom = ops->needed_tailroom; + } else { + needed_headroom = MACSEC_NEEDED_HEADROOM; + needed_tailroom = MACSEC_NEEDED_TAILROOM; + } + + dev->needed_headroom = real_dev->needed_headroom + needed_headroom; + dev->needed_tailroom = real_dev->needed_tailroom + needed_tailroom; +} + static int macsec_update_offload(struct net_device *dev, enum macsec_offload offload) { enum macsec_offload prev_offload; @@ -2620,8 +2635,13 @@ static int macsec_update_offload(struct net_device *dev, enum macsec_offload off ctx.secy = &macsec->secy; ret = offload == MACSEC_OFFLOAD_OFF ? macsec_offload(ops->mdo_del_secy, &ctx) : macsec_offload(ops->mdo_add_secy, &ctx); - if (ret) + if (ret) { macsec->offload = prev_offload; + return ret; + } + + macsec_set_head_tail_room(dev); + macsec->insert_tx_tag = macsec_needs_tx_tag(macsec, ops); return ret; } @@ -3379,6 +3399,40 @@ static struct genl_family macsec_fam __ro_after_init = { .resv_start_op = MACSEC_CMD_UPD_OFFLOAD + 1, }; +static struct sk_buff *macsec_insert_tx_tag(struct sk_buff *skb, + struct net_device *dev) +{ + struct macsec_dev *macsec = macsec_priv(dev); + const struct macsec_ops *ops; + struct phy_device *phydev; + struct macsec_context ctx; + int skb_final_len; + int err; + + ops = macsec_get_ops(macsec, &ctx); + skb_final_len = skb->len - ETH_HLEN + ops->needed_headroom + + ops->needed_tailroom; + if (unlikely(skb_final_len > macsec->real_dev->mtu)) { + err = -EINVAL; + goto cleanup; + } + + phydev = macsec->real_dev->phydev; + + err = skb_ensure_writable_head_tail(skb, dev); + if (unlikely(err < 0)) + goto cleanup; + + err = ops->mdo_insert_tx_tag(phydev, skb); + if (unlikely(err)) + goto cleanup; + + return skb; +cleanup: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -3393,6 +3447,15 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, skb_dst_drop(skb); dst_hold(&md_dst->dst); skb_dst_set(skb, &md_dst->dst); + + if (macsec->insert_tx_tag) { + skb = macsec_insert_tx_tag(skb, dev); + if (IS_ERR(skb)) { + DEV_STATS_INC(dev, tx_dropped); + return NETDEV_TX_OK; + } + } + skb->dev = macsec->real_dev; return dev_queue_xmit(skb); } @@ -3454,10 +3517,7 @@ static int macsec_dev_init(struct net_device *dev) dev->features = real_dev->features & MACSEC_FEATURES; dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; - dev->needed_headroom = real_dev->needed_headroom + - MACSEC_NEEDED_HEADROOM; - dev->needed_tailroom = real_dev->needed_tailroom + - MACSEC_NEEDED_TAILROOM; + macsec_set_head_tail_room(dev); if (is_zero_ether_addr(dev->dev_addr)) eth_hw_addr_inherit(dev, real_dev); @@ -3604,21 +3664,19 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct sockaddr *addr = p; + u8 old_addr[ETH_ALEN]; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (!(dev->flags & IFF_UP)) - goto out; - - err = dev_uc_add(real_dev, addr->sa_data); - if (err < 0) - return err; - - dev_uc_del(real_dev, dev->dev_addr); + if (dev->flags & IFF_UP) { + err = dev_uc_add(real_dev, addr->sa_data); + if (err < 0) + return err; + } -out: + ether_addr_copy(old_addr, dev->dev_addr); eth_hw_addr_set(dev, addr->sa_data); /* If h/w offloading is available, propagate to the device */ @@ -3627,13 +3685,29 @@ out: struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); - if (ops) { - ctx.secy = &macsec->secy; - macsec_offload(ops->mdo_upd_secy, &ctx); + if (!ops) { + err = -EOPNOTSUPP; + goto restore_old_addr; } + + ctx.secy = &macsec->secy; + err = macsec_offload(ops->mdo_upd_secy, &ctx); + if (err) + goto restore_old_addr; } + if (dev->flags & IFF_UP) + dev_uc_del(real_dev, old_addr); + return 0; + +restore_old_addr: + if (dev->flags & IFF_UP) + dev_uc_del(real_dev, addr->sa_data); + + eth_hw_addr_set(dev, old_addr); + + return err; } static int macsec_change_mtu(struct net_device *dev, int new_mtu) @@ -4126,6 +4200,9 @@ static int macsec_newlink(struct net *net, struct net_device *dev, err = macsec_offload(ops->mdo_add_secy, &ctx); if (err) goto del_dev; + + macsec->insert_tx_tag = + macsec_needs_tx_tag(macsec, ops); } } diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c index a750bd4c77a0..1ce7d67ba72e 100644 --- a/drivers/net/mdio/mdio-mux-bcm-iproc.c +++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c @@ -2,6 +2,7 @@ /* * Copyright 2016 Broadcom */ +#include <linux/align.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/device.h> @@ -11,6 +12,7 @@ #include <linux/of_mdio.h> #include <linux/phy.h> #include <linux/platform_device.h> +#include <linux/sizes.h> #define MDIO_RATE_ADJ_EXT_OFFSET 0x000 #define MDIO_RATE_ADJ_INT_OFFSET 0x004 @@ -220,12 +222,12 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev) md->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(md->base)) return PTR_ERR(md->base); - if (res->start & 0xfff) { + if (!IS_ALIGNED(res->start, SZ_4K)) { /* For backward compatibility in case the * base address is specified with an offset. */ dev_info(&pdev->dev, "fix base address in dt-blob\n"); - res->start &= ~0xfff; + res->start = ALIGN_DOWN(res->start, SZ_4K); res->end = res->start + MDIO_REG_ADDR_SPACE_SIZE - 1; } diff --git a/drivers/net/netdevsim/macsec.c b/drivers/net/netdevsim/macsec.c index 0d5f50430dd3..aa007b1e4b78 100644 --- a/drivers/net/netdevsim/macsec.c +++ b/drivers/net/netdevsim/macsec.c @@ -3,11 +3,6 @@ #include <net/macsec.h> #include "netdevsim.h" -static inline u64 sci_to_cpu(sci_t sci) -{ - return be64_to_cpu((__force __be64)sci); -} - static int nsim_macsec_find_secy(struct netdevsim *ns, sci_t sci) { int i; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 2e4667bf9ff5..9e2672800f0b 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -317,9 +317,10 @@ config NXP_CBTX_PHY config NXP_C45_TJA11XX_PHY tristate "NXP C45 TJA11XX PHYs" depends on PTP_1588_CLOCK_OPTIONAL + depends on MACSEC || !MACSEC help Enable support for NXP C45 TJA11XX PHYs. - Currently supports the TJA1103 and TJA1120 PHYs. + Currently supports the TJA1103, TJA1104 and TJA1120 PHYs. config NXP_TJA11XX_PHY tristate "NXP TJA11xx PHYs support" diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index e35ea69d9cb4..6097afd44392 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -84,7 +84,11 @@ obj-$(CONFIG_MICROSEMI_PHY) += mscc/ obj-$(CONFIG_MOTORCOMM_PHY) += motorcomm.o obj-$(CONFIG_NATIONAL_PHY) += national.o obj-$(CONFIG_NCN26000_PHY) += ncn26000.o -obj-$(CONFIG_NXP_C45_TJA11XX_PHY) += nxp-c45-tja11xx.o +nxp-c45-tja-objs += nxp-c45-tja11xx.o +ifdef CONFIG_MACSEC +nxp-c45-tja-objs += nxp-c45-tja11xx-macsec.o +endif +obj-$(CONFIG_NXP_C45_TJA11XX_PHY) += nxp-c45-tja.o obj-$(CONFIG_NXP_CBTX_PHY) += nxp-cbtx.o obj-$(CONFIG_NXP_TJA11XX_PHY) += nxp-tja11xx.o obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/aquantia/Kconfig b/drivers/net/phy/aquantia/Kconfig index a35de4b9b554..1a65678583cf 100644 --- a/drivers/net/phy/aquantia/Kconfig +++ b/drivers/net/phy/aquantia/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config AQUANTIA_PHY tristate "Aquantia PHYs" - select CRC_CCITT + select CRC_ITU_T help Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c index ff34d00d5a0e..0c9640ef153b 100644 --- a/drivers/net/phy/aquantia/aquantia_firmware.c +++ b/drivers/net/phy/aquantia/aquantia_firmware.c @@ -3,7 +3,7 @@ #include <linux/bitfield.h> #include <linux/of.h> #include <linux/firmware.h> -#include <linux/crc-ccitt.h> +#include <linux/crc-itu-t.h> #include <linux/nvmem-consumer.h> #include <asm/unaligned.h> @@ -132,7 +132,7 @@ static int aqr_fw_load_memory(struct phy_device *phydev, u32 addr, crc_data[3] = word; /* ...calculate CRC as we load data... */ - crc = crc_ccitt_false(crc, crc_data, sizeof(crc_data)); + crc = crc_itu_t(crc, crc_data, sizeof(crc_data)); } /* ...gets CRC from MAILBOX after we have loaded the entire section... */ up_crc = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE2); @@ -164,7 +164,7 @@ static int aqr_fw_boot(struct phy_device *phydev, const u8 *data, size_t size, phydev_err(phydev, "bad firmware CRC in firmware\n"); return ret; } - calculated_crc = crc_ccitt_false(0, data, size - sizeof(u16)); + calculated_crc = crc_itu_t(0, data, size - sizeof(u16)); if (read_crc != calculated_crc) { phydev_err(phydev, "bad firmware CRC: file 0x%04x calculated 0x%04x\n", read_crc, calculated_crc); diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index a7d28848ee93..19cfbf36fe80 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -462,7 +462,7 @@ static int at803x_set_wol(struct phy_device *phydev, if (!ndev) return -ENODEV; - mac = (const u8 *) ndev->dev_addr; + mac = (const u8 *)ndev->dev_addr; if (!is_valid_ether_addr(mac)) return -EINVAL; @@ -916,9 +916,9 @@ static void at803x_link_change_notify(struct phy_device *phydev) at803x_context_save(phydev, &context); phy_device_reset(phydev, 1); - msleep(1); + usleep_range(1000, 2000); phy_device_reset(phydev, 0); - msleep(1); + usleep_range(1000, 2000); at803x_context_restore(phydev, &context); @@ -1733,7 +1733,7 @@ static int qca83xx_resume(struct phy_device *phydev) if (ret) return ret; - msleep(1); + usleep_range(1000, 2000); return 0; } @@ -1781,27 +1781,27 @@ static int qca808x_phy_fast_retrain_config(struct phy_device *phydev) return ret; phy_write_mmd(phydev, MDIO_MMD_AN, QCA808X_PHY_MMD7_TOP_OPTION1, - QCA808X_TOP_OPTION1_DATA); + QCA808X_TOP_OPTION1_DATA); phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_20DB, - QCA808X_MSE_THRESHOLD_20DB_VALUE); + QCA808X_MSE_THRESHOLD_20DB_VALUE); phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_17DB, - QCA808X_MSE_THRESHOLD_17DB_VALUE); + QCA808X_MSE_THRESHOLD_17DB_VALUE); phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_27DB, - QCA808X_MSE_THRESHOLD_27DB_VALUE); + QCA808X_MSE_THRESHOLD_27DB_VALUE); phy_write_mmd(phydev, MDIO_MMD_PMAPMD, QCA808X_PHY_MMD1_MSE_THRESHOLD_28DB, - QCA808X_MSE_THRESHOLD_28DB_VALUE); + QCA808X_MSE_THRESHOLD_28DB_VALUE); phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_1, - QCA808X_MMD3_DEBUG_1_VALUE); + QCA808X_MMD3_DEBUG_1_VALUE); phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_4, - QCA808X_MMD3_DEBUG_4_VALUE); + QCA808X_MMD3_DEBUG_4_VALUE); phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_5, - QCA808X_MMD3_DEBUG_5_VALUE); + QCA808X_MMD3_DEBUG_5_VALUE); phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_3, - QCA808X_MMD3_DEBUG_3_VALUE); + QCA808X_MMD3_DEBUG_3_VALUE); phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_6, - QCA808X_MMD3_DEBUG_6_VALUE); + QCA808X_MMD3_DEBUG_6_VALUE); phy_write_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_DEBUG_2, - QCA808X_MMD3_DEBUG_2_VALUE); + QCA808X_MMD3_DEBUG_2_VALUE); return 0; } @@ -1838,13 +1838,14 @@ static int qca808x_config_init(struct phy_device *phydev) /* Active adc&vga on 802.3az for the link 1000M and 100M */ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, QCA808X_PHY_MMD3_ADDR_CLD_CTRL7, - QCA808X_8023AZ_AFE_CTRL_MASK, QCA808X_8023AZ_AFE_EN); + QCA808X_8023AZ_AFE_CTRL_MASK, QCA808X_8023AZ_AFE_EN); if (ret) return ret; /* Adjust the threshold on 802.3az for the link 1000M */ ret = phy_write_mmd(phydev, MDIO_MMD_PCS, - QCA808X_PHY_MMD3_AZ_TRAINING_CTRL, QCA808X_MMD3_AZ_TRAINING_VAL); + QCA808X_PHY_MMD3_AZ_TRAINING_CTRL, + QCA808X_MMD3_AZ_TRAINING_VAL); if (ret) return ret; @@ -1870,7 +1871,8 @@ static int qca808x_config_init(struct phy_device *phydev) /* Configure adc threshold as 100mv for the link 10M */ return at803x_debug_reg_mask(phydev, QCA808X_PHY_DEBUG_ADC_THRESHOLD, - QCA808X_ADC_THRESHOLD_MASK, QCA808X_ADC_THRESHOLD_100MV); + QCA808X_ADC_THRESHOLD_MASK, + QCA808X_ADC_THRESHOLD_100MV); } static int qca808x_read_status(struct phy_device *phydev) @@ -1883,7 +1885,7 @@ static int qca808x_read_status(struct phy_device *phydev) return ret; linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->lp_advertising, - ret & MDIO_AN_10GBT_STAT_LP2_5G); + ret & MDIO_AN_10GBT_STAT_LP2_5G); ret = genphy_read_status(phydev); if (ret) @@ -1913,7 +1915,7 @@ static int qca808x_read_status(struct phy_device *phydev) */ if (qca808x_has_fast_retrain_or_slave_seed(phydev)) { if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR || - qca808x_is_prefer_master(phydev)) { + qca808x_is_prefer_master(phydev)) { qca808x_phy_ms_seed_enable(phydev, false); } else { qca808x_phy_ms_seed_enable(phydev, true); @@ -2070,18 +2072,22 @@ static int qca808x_cable_test_get_status(struct phy_device *phydev, bool *finish ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_D, qca808x_cable_test_result_trans(pair_d)); - if (qca808x_cdt_fault_length_valid(pair_a)) - ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_A, - qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_A)); - if (qca808x_cdt_fault_length_valid(pair_b)) - ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_B, - qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_B)); - if (qca808x_cdt_fault_length_valid(pair_c)) - ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_C, - qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_C)); - if (qca808x_cdt_fault_length_valid(pair_d)) - ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_D, - qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_D)); + if (qca808x_cdt_fault_length_valid(pair_a)) { + val = qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_A); + ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_A, val); + } + if (qca808x_cdt_fault_length_valid(pair_b)) { + val = qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_B); + ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_B, val); + } + if (qca808x_cdt_fault_length_valid(pair_c)) { + val = qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_C); + ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_C, val); + } + if (qca808x_cdt_fault_length_valid(pair_d)) { + val = qca808x_cdt_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_D); + ethnl_cable_test_fault_length(phydev, ETHTOOL_A_CABLE_PAIR_D, val); + } *finished = true; @@ -2148,8 +2154,9 @@ static void qca808x_link_change_notify(struct phy_device *phydev) * the interface device address is always phy address added by 1. */ mdiobus_c45_modify_changed(phydev->mdio.bus, phydev->mdio.addr + 1, - MDIO_MMD_PMAPMD, QCA8081_PHY_SERDES_MMD1_FIFO_CTRL, - QCA8081_PHY_FIFO_RSTN, phydev->link ? QCA8081_PHY_FIFO_RSTN : 0); + MDIO_MMD_PMAPMD, QCA8081_PHY_SERDES_MMD1_FIFO_CTRL, + QCA8081_PHY_FIFO_RSTN, + phydev->link ? QCA8081_PHY_FIFO_RSTN : 0); } static struct phy_driver at803x_driver[] = { diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 6cf73c15635b..afbad1ad8683 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -193,6 +193,10 @@ static void mdiobus_release(struct device *d) bus->state != MDIOBUS_ALLOCATED, "%s: not in RELEASED or ALLOCATED state\n", bus->id); + + if (bus->state == MDIOBUS_RELEASED) + fwnode_handle_put(dev_fwnode(d)); + kfree(bus); } @@ -684,6 +688,15 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id); + /* If the bus state is allocated, we're registering a fresh bus + * that may have a fwnode associated with it. Grab a reference + * to the fwnode. This will be dropped when the bus is released. + * If the bus was set to unregistered, it means that the bus was + * previously registered, and we've already grabbed a reference. + */ + if (bus->state == MDIOBUS_ALLOCATED) + fwnode_handle_get(dev_fwnode(&bus->dev)); + /* We need to set state to MDIOBUS_UNREGISTERED to correctly release * the device in mdiobus_free() * diff --git a/drivers/net/phy/nxp-c45-tja11xx-macsec.c b/drivers/net/phy/nxp-c45-tja11xx-macsec.c new file mode 100644 index 000000000000..550ef08970f4 --- /dev/null +++ b/drivers/net/phy/nxp-c45-tja11xx-macsec.c @@ -0,0 +1,1729 @@ +// SPDX-License-Identifier: GPL-2.0 +/* NXP C45 PTP PHY driver interface + * Copyright 2023 NXP + * Author: Radu Pirea <radu-nicolae.pirea@oss.nxp.com> + */ + +#include <linux/delay.h> +#include <linux/ethtool_netlink.h> +#include <linux/kernel.h> +#include <linux/mii.h> +#include <linux/module.h> +#include <linux/phy.h> +#include <linux/processor.h> +#include <net/dst_metadata.h> +#include <net/macsec.h> + +#include "nxp-c45-tja11xx.h" + +#define MACSEC_REG_SIZE 32 +#define TX_SC_MAX 4 + +#define TX_SC_BIT(secy_id) BIT(MACSEC_REG_SIZE - (secy_id) - 1) + +#define VEND1_MACSEC_BASE 0x9000 + +#define MACSEC_CFG 0x0000 +#define MACSEC_CFG_BYPASS BIT(1) +#define MACSEC_CFG_S0I BIT(0) + +#define MACSEC_TPNET 0x0044 +#define PN_WRAP_THRESHOLD 0xffffffff + +#define MACSEC_RXSCA 0x0080 +#define MACSEC_RXSCKA 0x0084 + +#define MACSEC_TXSCA 0x00C0 +#define MACSEC_TXSCKA 0x00C4 + +#define MACSEC_RXSC_SCI_1H 0x0100 + +#define MACSEC_RXSC_CFG 0x0128 +#define MACSEC_RXSC_CFG_XPN BIT(25) +#define MACSEC_RXSC_CFG_AES_256 BIT(24) +#define MACSEC_RXSC_CFG_SCI_EN BIT(11) +#define MACSEC_RXSC_CFG_RP BIT(10) +#define MACSEC_RXSC_CFG_VF_MASK GENMASK(9, 8) +#define MACSEC_RXSC_CFG_VF_OFF 8 + +#define MACSEC_RPW 0x012C + +#define MACSEC_RXSA_A_CS 0x0180 +#define MACSEC_RXSA_A_NPN 0x0184 +#define MACSEC_RXSA_A_XNPN 0x0188 +#define MACSEC_RXSA_A_LNPN 0x018C +#define MACSEC_RXSA_A_LXNPN 0x0190 + +#define MACSEC_RXSA_B_CS 0x01C0 +#define MACSEC_RXSA_B_NPN 0x01C4 +#define MACSEC_RXSA_B_XNPN 0x01C8 +#define MACSEC_RXSA_B_LNPN 0x01CC +#define MACSEC_RXSA_B_LXNPN 0x01D0 + +#define MACSEC_RXSA_CS_AN_OFF 1 +#define MACSEC_RXSA_CS_EN BIT(0) + +#define MACSEC_TXSC_SCI_1H 0x0200 +#define MACSEC_TXSC_CFG 0x0228 +#define MACSEC_TXSC_CFG_XPN BIT(25) +#define MACSEC_TXSC_CFG_AES_256 BIT(24) +#define MACSEC_TXSC_CFG_AN_MASK GENMASK(19, 18) +#define MACSEC_TXSC_CFG_AN_OFF 18 +#define MACSEC_TXSC_CFG_ASA BIT(17) +#define MACSEC_TXSC_CFG_SCE BIT(16) +#define MACSEC_TXSC_CFG_ENCRYPT BIT(4) +#define MACSEC_TXSC_CFG_PROTECT BIT(3) +#define MACSEC_TXSC_CFG_SEND_SCI BIT(2) +#define MACSEC_TXSC_CFG_END_STATION BIT(1) +#define MACSEC_TXSC_CFG_SCB BIT(0) + +#define MACSEC_TXSA_A_CS 0x0280 +#define MACSEC_TXSA_A_NPN 0x0284 +#define MACSEC_TXSA_A_XNPN 0x0288 + +#define MACSEC_TXSA_B_CS 0x02C0 +#define MACSEC_TXSA_B_NPN 0x02C4 +#define MACSEC_TXSA_B_XNPN 0x02C8 + +#define MACSEC_SA_CS_A BIT(31) + +#define MACSEC_EVR 0x0400 +#define MACSEC_EVER 0x0404 + +#define MACSEC_RXSA_A_KA 0x0700 +#define MACSEC_RXSA_A_SSCI 0x0720 +#define MACSEC_RXSA_A_SALT 0x0724 + +#define MACSEC_RXSA_B_KA 0x0740 +#define MACSEC_RXSA_B_SSCI 0x0760 +#define MACSEC_RXSA_B_SALT 0x0764 + +#define MACSEC_TXSA_A_KA 0x0780 +#define MACSEC_TXSA_A_SSCI 0x07A0 +#define MACSEC_TXSA_A_SALT 0x07A4 + +#define MACSEC_TXSA_B_KA 0x07C0 +#define MACSEC_TXSA_B_SSCI 0x07E0 +#define MACSEC_TXSA_B_SALT 0x07E4 + +#define MACSEC_UPFR0D2 0x0A08 +#define MACSEC_UPFR0M1 0x0A10 +#define MACSEC_OVP BIT(12) + +#define MACSEC_UPFR0M2 0x0A14 +#define ETYPE_MASK 0xffff + +#define MACSEC_UPFR0R 0x0A18 +#define MACSEC_UPFR_EN BIT(0) + +#define ADPTR_CNTRL 0x0F00 +#define ADPTR_CNTRL_CONFIG_EN BIT(14) +#define ADPTR_CNTRL_ADPTR_EN BIT(12) +#define ADPTR_TX_TAG_CNTRL 0x0F0C +#define ADPTR_TX_TAG_CNTRL_ENA BIT(31) + +#define TX_SC_FLT_BASE 0x800 +#define TX_SC_FLT_SIZE 0x10 +#define TX_FLT_BASE(flt_id) (TX_SC_FLT_BASE + \ + TX_SC_FLT_SIZE * (flt_id)) + +#define TX_SC_FLT_OFF_MAC_DA_SA 0x04 +#define TX_SC_FLT_OFF_MAC_SA 0x08 +#define TX_SC_FLT_OFF_MAC_CFG 0x0C +#define TX_SC_FLT_BY_SA BIT(14) +#define TX_SC_FLT_EN BIT(8) + +#define TX_SC_FLT_MAC_DA_SA(base) ((base) + TX_SC_FLT_OFF_MAC_DA_SA) +#define TX_SC_FLT_MAC_SA(base) ((base) + TX_SC_FLT_OFF_MAC_SA) +#define TX_SC_FLT_MAC_CFG(base) ((base) + TX_SC_FLT_OFF_MAC_CFG) + +#define ADAPTER_EN BIT(6) +#define MACSEC_EN BIT(5) + +#define MACSEC_INOV1HS 0x0140 +#define MACSEC_INOV2HS 0x0144 +#define MACSEC_INOD1HS 0x0148 +#define MACSEC_INOD2HS 0x014C +#define MACSEC_RXSCIPUS 0x0150 +#define MACSEC_RXSCIPDS 0x0154 +#define MACSEC_RXSCIPLS 0x0158 +#define MACSEC_RXAN0INUSS 0x0160 +#define MACSEC_RXAN0IPUSS 0x0170 +#define MACSEC_RXSA_A_IPOS 0x0194 +#define MACSEC_RXSA_A_IPIS 0x01B0 +#define MACSEC_RXSA_A_IPNVS 0x01B4 +#define MACSEC_RXSA_B_IPOS 0x01D4 +#define MACSEC_RXSA_B_IPIS 0x01F0 +#define MACSEC_RXSA_B_IPNVS 0x01F4 +#define MACSEC_OPUS 0x021C +#define MACSEC_OPTLS 0x022C +#define MACSEC_OOP1HS 0x0240 +#define MACSEC_OOP2HS 0x0244 +#define MACSEC_OOE1HS 0x0248 +#define MACSEC_OOE2HS 0x024C +#define MACSEC_TXSA_A_OPPS 0x028C +#define MACSEC_TXSA_A_OPES 0x0290 +#define MACSEC_TXSA_B_OPPS 0x02CC +#define MACSEC_TXSA_B_OPES 0x02D0 +#define MACSEC_INPWTS 0x0630 +#define MACSEC_INPBTS 0x0638 +#define MACSEC_IPSNFS 0x063C + +#define TJA11XX_TLV_TX_NEEDED_HEADROOM (32) +#define TJA11XX_TLV_NEEDED_TAILROOM (0) + +#define ETH_P_TJA11XX_TLV (0x4e58) + +enum nxp_c45_sa_type { + TX_SA, + RX_SA, +}; + +struct nxp_c45_sa { + void *sa; + const struct nxp_c45_sa_regs *regs; + enum nxp_c45_sa_type type; + bool is_key_a; + u8 an; + struct list_head list; +}; + +struct nxp_c45_secy { + struct macsec_secy *secy; + struct macsec_rx_sc *rx_sc; + struct list_head sa_list; + int secy_id; + bool rx_sc0_impl; + struct list_head list; +}; + +struct nxp_c45_macsec { + struct list_head secy_list; + DECLARE_BITMAP(secy_bitmap, TX_SC_MAX); + DECLARE_BITMAP(tx_sc_bitmap, TX_SC_MAX); +}; + +struct nxp_c45_sa_regs { + u16 cs; + u16 npn; + u16 xnpn; + u16 lnpn; + u16 lxnpn; + u16 ka; + u16 ssci; + u16 salt; + u16 ipis; + u16 ipnvs; + u16 ipos; + u16 opps; + u16 opes; +}; + +static const struct nxp_c45_sa_regs rx_sa_a_regs = { + .cs = MACSEC_RXSA_A_CS, + .npn = MACSEC_RXSA_A_NPN, + .xnpn = MACSEC_RXSA_A_XNPN, + .lnpn = MACSEC_RXSA_A_LNPN, + .lxnpn = MACSEC_RXSA_A_LXNPN, + .ka = MACSEC_RXSA_A_KA, + .ssci = MACSEC_RXSA_A_SSCI, + .salt = MACSEC_RXSA_A_SALT, + .ipis = MACSEC_RXSA_A_IPIS, + .ipnvs = MACSEC_RXSA_A_IPNVS, + .ipos = MACSEC_RXSA_A_IPOS, +}; + +static const struct nxp_c45_sa_regs rx_sa_b_regs = { + .cs = MACSEC_RXSA_B_CS, + .npn = MACSEC_RXSA_B_NPN, + .xnpn = MACSEC_RXSA_B_XNPN, + .lnpn = MACSEC_RXSA_B_LNPN, + .lxnpn = MACSEC_RXSA_B_LXNPN, + .ka = MACSEC_RXSA_B_KA, + .ssci = MACSEC_RXSA_B_SSCI, + .salt = MACSEC_RXSA_B_SALT, + .ipis = MACSEC_RXSA_B_IPIS, + .ipnvs = MACSEC_RXSA_B_IPNVS, + .ipos = MACSEC_RXSA_B_IPOS, +}; + +static const struct nxp_c45_sa_regs tx_sa_a_regs = { + .cs = MACSEC_TXSA_A_CS, + .npn = MACSEC_TXSA_A_NPN, + .xnpn = MACSEC_TXSA_A_XNPN, + .ka = MACSEC_TXSA_A_KA, + .ssci = MACSEC_TXSA_A_SSCI, + .salt = MACSEC_TXSA_A_SALT, + .opps = MACSEC_TXSA_A_OPPS, + .opes = MACSEC_TXSA_A_OPES, +}; + +static const struct nxp_c45_sa_regs tx_sa_b_regs = { + .cs = MACSEC_TXSA_B_CS, + .npn = MACSEC_TXSA_B_NPN, + .xnpn = MACSEC_TXSA_B_XNPN, + .ka = MACSEC_TXSA_B_KA, + .ssci = MACSEC_TXSA_B_SSCI, + .salt = MACSEC_TXSA_B_SALT, + .opps = MACSEC_TXSA_B_OPPS, + .opes = MACSEC_TXSA_B_OPES, +}; + +static const +struct nxp_c45_sa_regs *nxp_c45_sa_regs_get(enum nxp_c45_sa_type sa_type, + bool key_a) +{ + if (sa_type == RX_SA) + if (key_a) + return &rx_sa_a_regs; + else + return &rx_sa_b_regs; + else if (sa_type == TX_SA) + if (key_a) + return &tx_sa_a_regs; + else + return &tx_sa_b_regs; + else + return NULL; +} + +static int nxp_c45_macsec_write(struct phy_device *phydev, u16 addr, u32 value) +{ + u32 lvalue = value; + u16 laddr; + int ret; + + WARN_ON_ONCE(addr % 4); + + phydev_dbg(phydev, "write addr 0x%x value 0x%x\n", addr, value); + + laddr = VEND1_MACSEC_BASE + addr / 2; + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, laddr, lvalue); + if (ret) + return ret; + + laddr += 1; + lvalue >>= 16; + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, laddr, lvalue); + + return ret; +} + +static int nxp_c45_macsec_read(struct phy_device *phydev, u16 addr, u32 *value) +{ + u32 lvalue; + u16 laddr; + int ret; + + WARN_ON_ONCE(addr % 4); + + laddr = VEND1_MACSEC_BASE + addr / 2; + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, laddr); + if (ret < 0) + return ret; + + laddr += 1; + lvalue = (u32)ret & 0xffff; + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, laddr); + if (ret < 0) + return ret; + + lvalue |= (u32)ret << 16; + *value = lvalue; + + phydev_dbg(phydev, "read addr 0x%x value 0x%x\n", addr, *value); + + return 0; +} + +static void nxp_c45_macsec_read32_64(struct phy_device *phydev, u16 addr, + u64 *value) +{ + u32 lvalue; + + nxp_c45_macsec_read(phydev, addr, &lvalue); + *value = lvalue; +} + +static void nxp_c45_macsec_read64(struct phy_device *phydev, u16 addr, + u64 *value) +{ + u32 lvalue; + + nxp_c45_macsec_read(phydev, addr, &lvalue); + *value = (u64)lvalue << 32; + nxp_c45_macsec_read(phydev, addr + 4, &lvalue); + *value |= lvalue; +} + +static void nxp_c45_secy_irq_en(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy, bool en) +{ + u32 reg; + + nxp_c45_macsec_read(phydev, MACSEC_EVER, ®); + if (en) + reg |= TX_SC_BIT(phy_secy->secy_id); + else + reg &= ~TX_SC_BIT(phy_secy->secy_id); + nxp_c45_macsec_write(phydev, MACSEC_EVER, reg); +} + +static struct nxp_c45_secy *nxp_c45_find_secy(struct list_head *secy_list, + sci_t sci) +{ + struct nxp_c45_secy *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, secy_list, list) + if (pos->secy->sci == sci) + return pos; + + return ERR_PTR(-EINVAL); +} + +static struct +nxp_c45_secy *nxp_c45_find_secy_by_id(struct list_head *secy_list, + int id) +{ + struct nxp_c45_secy *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, secy_list, list) + if (pos->secy_id == id) + return pos; + + return ERR_PTR(-EINVAL); +} + +static void nxp_c45_secy_free(struct nxp_c45_secy *phy_secy) +{ + list_del(&phy_secy->list); + kfree(phy_secy); +} + +static struct nxp_c45_sa *nxp_c45_find_sa(struct list_head *sa_list, + enum nxp_c45_sa_type sa_type, u8 an) +{ + struct nxp_c45_sa *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, sa_list, list) + if (pos->an == an && pos->type == sa_type) + return pos; + + return ERR_PTR(-EINVAL); +} + +static struct nxp_c45_sa *nxp_c45_sa_alloc(struct list_head *sa_list, void *sa, + enum nxp_c45_sa_type sa_type, u8 an) +{ + struct nxp_c45_sa *first = NULL, *pos, *tmp; + int occurrences = 0; + + list_for_each_entry_safe(pos, tmp, sa_list, list) { + if (pos->type != sa_type) + continue; + + if (pos->an == an) + return ERR_PTR(-EINVAL); + + first = pos; + occurrences++; + if (occurrences >= 2) + return ERR_PTR(-ENOSPC); + } + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ERR_PTR(-ENOMEM); + + if (first) + tmp->is_key_a = !first->is_key_a; + else + tmp->is_key_a = true; + + tmp->sa = sa; + tmp->type = sa_type; + tmp->an = an; + tmp->regs = nxp_c45_sa_regs_get(tmp->type, tmp->is_key_a); + list_add_tail(&tmp->list, sa_list); + + return tmp; +} + +static void nxp_c45_sa_free(struct nxp_c45_sa *sa) +{ + list_del(&sa->list); + kfree(sa); +} + +static void nxp_c45_sa_list_free(struct list_head *sa_list) +{ + struct nxp_c45_sa *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, sa_list, list) + nxp_c45_sa_free(pos); +} + +static void nxp_c45_sa_set_pn(struct phy_device *phydev, + struct nxp_c45_sa *sa, u64 pn, + u32 replay_window) +{ + const struct nxp_c45_sa_regs *sa_regs = sa->regs; + pn_t npn = {.full64 = pn}; + pn_t lnpn; + + nxp_c45_macsec_write(phydev, sa_regs->npn, npn.lower); + nxp_c45_macsec_write(phydev, sa_regs->xnpn, npn.upper); + if (sa->type != RX_SA) + return; + + if (pn > replay_window) + lnpn.full64 = pn - replay_window; + else + lnpn.full64 = 1; + + nxp_c45_macsec_write(phydev, sa_regs->lnpn, lnpn.lower); + nxp_c45_macsec_write(phydev, sa_regs->lxnpn, lnpn.upper); +} + +static void nxp_c45_sa_set_key(struct macsec_context *ctx, + const struct nxp_c45_sa_regs *sa_regs, + u8 *salt, ssci_t ssci) +{ + struct phy_device *phydev = ctx->phydev; + u32 key_size = ctx->secy->key_len / 4; + u32 salt_size = MACSEC_SALT_LEN / 4; + u32 *key_u32 = (u32 *)ctx->sa.key; + u32 *salt_u32 = (u32 *)salt; + u32 reg, value; + int i; + + for (i = 0; i < key_size; i++) { + reg = sa_regs->ka + i * 4; + value = (__force u32)cpu_to_be32(key_u32[i]); + nxp_c45_macsec_write(phydev, reg, value); + } + + if (ctx->secy->xpn) { + for (i = 0; i < salt_size; i++) { + reg = sa_regs->salt + (2 - i) * 4; + value = (__force u32)cpu_to_be32(salt_u32[i]); + nxp_c45_macsec_write(phydev, reg, value); + } + + value = (__force u32)cpu_to_be32((__force u32)ssci); + nxp_c45_macsec_write(phydev, sa_regs->ssci, value); + } + + nxp_c45_macsec_write(phydev, sa_regs->cs, MACSEC_SA_CS_A); +} + +static void nxp_c45_rx_sa_clear_stats(struct phy_device *phydev, + struct nxp_c45_sa *sa) +{ + nxp_c45_macsec_write(phydev, sa->regs->ipis, 0); + nxp_c45_macsec_write(phydev, sa->regs->ipnvs, 0); + nxp_c45_macsec_write(phydev, sa->regs->ipos, 0); + + nxp_c45_macsec_write(phydev, MACSEC_RXAN0INUSS + sa->an * 4, 0); + nxp_c45_macsec_write(phydev, MACSEC_RXAN0IPUSS + sa->an * 4, 0); +} + +static void nxp_c45_rx_sa_read_stats(struct phy_device *phydev, + struct nxp_c45_sa *sa, + struct macsec_rx_sa_stats *stats) +{ + nxp_c45_macsec_read(phydev, sa->regs->ipis, &stats->InPktsInvalid); + nxp_c45_macsec_read(phydev, sa->regs->ipnvs, &stats->InPktsNotValid); + nxp_c45_macsec_read(phydev, sa->regs->ipos, &stats->InPktsOK); +} + +static void nxp_c45_tx_sa_clear_stats(struct phy_device *phydev, + struct nxp_c45_sa *sa) +{ + nxp_c45_macsec_write(phydev, sa->regs->opps, 0); + nxp_c45_macsec_write(phydev, sa->regs->opes, 0); +} + +static void nxp_c45_tx_sa_read_stats(struct phy_device *phydev, + struct nxp_c45_sa *sa, + struct macsec_tx_sa_stats *stats) +{ + nxp_c45_macsec_read(phydev, sa->regs->opps, &stats->OutPktsProtected); + nxp_c45_macsec_read(phydev, sa->regs->opes, &stats->OutPktsEncrypted); +} + +static void nxp_c45_rx_sa_update(struct phy_device *phydev, + struct nxp_c45_sa *sa, bool en) +{ + const struct nxp_c45_sa_regs *sa_regs = sa->regs; + u32 cfg; + + cfg = sa->an << MACSEC_RXSA_CS_AN_OFF; + cfg |= en ? MACSEC_RXSA_CS_EN : 0; + nxp_c45_macsec_write(phydev, sa_regs->cs, cfg); +} + +static void nxp_c45_tx_sa_update(struct phy_device *phydev, + struct nxp_c45_sa *sa, bool en) +{ + u32 cfg = 0; + + nxp_c45_macsec_read(phydev, MACSEC_TXSC_CFG, &cfg); + + cfg &= ~MACSEC_TXSC_CFG_AN_MASK; + cfg |= sa->an << MACSEC_TXSC_CFG_AN_OFF; + + if (sa->is_key_a) + cfg &= ~MACSEC_TXSC_CFG_ASA; + else + cfg |= MACSEC_TXSC_CFG_ASA; + + if (en) + cfg |= MACSEC_TXSC_CFG_SCE; + else + cfg &= ~MACSEC_TXSC_CFG_SCE; + + nxp_c45_macsec_write(phydev, MACSEC_TXSC_CFG, cfg); +} + +static void nxp_c45_set_sci(struct phy_device *phydev, u16 sci_base_addr, + sci_t sci) +{ + u64 lsci = sci_to_cpu(sci); + + nxp_c45_macsec_write(phydev, sci_base_addr, lsci >> 32); + nxp_c45_macsec_write(phydev, sci_base_addr + 4, lsci); +} + +static bool nxp_c45_port_is_1(sci_t sci) +{ + u16 port = sci_to_cpu(sci); + + return port == 1; +} + +static void nxp_c45_select_secy(struct phy_device *phydev, u8 id) +{ + nxp_c45_macsec_write(phydev, MACSEC_RXSCA, id); + nxp_c45_macsec_write(phydev, MACSEC_RXSCKA, id); + nxp_c45_macsec_write(phydev, MACSEC_TXSCA, id); + nxp_c45_macsec_write(phydev, MACSEC_TXSCKA, id); +} + +static bool nxp_c45_secy_valid(struct nxp_c45_secy *phy_secy, + bool can_rx_sc0_impl) +{ + bool end_station = phy_secy->secy->tx_sc.end_station; + bool scb = phy_secy->secy->tx_sc.scb; + + phy_secy->rx_sc0_impl = false; + + if (end_station) { + if (!nxp_c45_port_is_1(phy_secy->secy->sci)) + return false; + if (!phy_secy->rx_sc) + return true; + return nxp_c45_port_is_1(phy_secy->rx_sc->sci); + } + + if (scb) + return false; + + if (!can_rx_sc0_impl) + return false; + + if (phy_secy->secy_id != 0) + return false; + + phy_secy->rx_sc0_impl = true; + + return true; +} + +static bool nxp_c45_rx_sc0_impl(struct nxp_c45_secy *phy_secy) +{ + bool end_station = phy_secy->secy->tx_sc.end_station; + bool send_sci = phy_secy->secy->tx_sc.send_sci; + bool scb = phy_secy->secy->tx_sc.scb; + + return !end_station && !send_sci && !scb; +} + +static bool nxp_c45_mac_addr_free(struct macsec_context *ctx) +{ + struct nxp_c45_phy *priv = ctx->phydev->priv; + struct nxp_c45_secy *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &priv->macsec->secy_list, list) { + if (pos->secy == ctx->secy) + continue; + + if (memcmp(pos->secy->netdev->dev_addr, + ctx->secy->netdev->dev_addr, ETH_ALEN) == 0) + return false; + } + + return true; +} + +static void nxp_c45_tx_sc_en_flt(struct phy_device *phydev, int secy_id, + bool en) +{ + u32 tx_flt_base = TX_FLT_BASE(secy_id); + u32 reg = 0; + + nxp_c45_macsec_read(phydev, TX_SC_FLT_MAC_CFG(tx_flt_base), ®); + if (en) + reg |= TX_SC_FLT_EN; + else + reg &= ~TX_SC_FLT_EN; + nxp_c45_macsec_write(phydev, TX_SC_FLT_MAC_CFG(tx_flt_base), reg); +} + +static void nxp_c45_tx_sc_set_flt(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy) +{ + const u8 *dev_addr = phy_secy->secy->netdev->dev_addr; + u32 tx_flt_base = TX_FLT_BASE(phy_secy->secy_id); + u32 reg; + + reg = dev_addr[0] << 8 | dev_addr[1]; + nxp_c45_macsec_write(phydev, TX_SC_FLT_MAC_DA_SA(tx_flt_base), reg); + reg = dev_addr[5] | dev_addr[4] << 8 | dev_addr[3] << 16 | + dev_addr[2] << 24; + + nxp_c45_macsec_write(phydev, TX_SC_FLT_MAC_SA(tx_flt_base), reg); + nxp_c45_macsec_read(phydev, TX_SC_FLT_MAC_CFG(tx_flt_base), ®); + reg &= TX_SC_FLT_EN; + reg |= TX_SC_FLT_BY_SA | phy_secy->secy_id; + nxp_c45_macsec_write(phydev, TX_SC_FLT_MAC_CFG(tx_flt_base), reg); +} + +static void nxp_c45_tx_sc_update(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy) +{ + u32 cfg = 0; + + nxp_c45_macsec_read(phydev, MACSEC_TXSC_CFG, &cfg); + + phydev_dbg(phydev, "XPN %s\n", phy_secy->secy->xpn ? "on" : "off"); + if (phy_secy->secy->xpn) + cfg |= MACSEC_TXSC_CFG_XPN; + else + cfg &= ~MACSEC_TXSC_CFG_XPN; + + phydev_dbg(phydev, "key len %u\n", phy_secy->secy->key_len); + if (phy_secy->secy->key_len == 32) + cfg |= MACSEC_TXSC_CFG_AES_256; + else + cfg &= ~MACSEC_TXSC_CFG_AES_256; + + phydev_dbg(phydev, "encryption %s\n", + phy_secy->secy->tx_sc.encrypt ? "on" : "off"); + if (phy_secy->secy->tx_sc.encrypt) + cfg |= MACSEC_TXSC_CFG_ENCRYPT; + else + cfg &= ~MACSEC_TXSC_CFG_ENCRYPT; + + phydev_dbg(phydev, "protect frames %s\n", + phy_secy->secy->protect_frames ? "on" : "off"); + if (phy_secy->secy->protect_frames) + cfg |= MACSEC_TXSC_CFG_PROTECT; + else + cfg &= ~MACSEC_TXSC_CFG_PROTECT; + + phydev_dbg(phydev, "send sci %s\n", + phy_secy->secy->tx_sc.send_sci ? "on" : "off"); + if (phy_secy->secy->tx_sc.send_sci) + cfg |= MACSEC_TXSC_CFG_SEND_SCI; + else + cfg &= ~MACSEC_TXSC_CFG_SEND_SCI; + + phydev_dbg(phydev, "end station %s\n", + phy_secy->secy->tx_sc.end_station ? "on" : "off"); + if (phy_secy->secy->tx_sc.end_station) + cfg |= MACSEC_TXSC_CFG_END_STATION; + else + cfg &= ~MACSEC_TXSC_CFG_END_STATION; + + phydev_dbg(phydev, "scb %s\n", + phy_secy->secy->tx_sc.scb ? "on" : "off"); + if (phy_secy->secy->tx_sc.scb) + cfg |= MACSEC_TXSC_CFG_SCB; + else + cfg &= ~MACSEC_TXSC_CFG_SCB; + + nxp_c45_macsec_write(phydev, MACSEC_TXSC_CFG, cfg); +} + +static void nxp_c45_tx_sc_clear_stats(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy) +{ + struct nxp_c45_sa *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &phy_secy->sa_list, list) + if (pos->type == TX_SA) + nxp_c45_tx_sa_clear_stats(phydev, pos); + + nxp_c45_macsec_write(phydev, MACSEC_OPUS, 0); + nxp_c45_macsec_write(phydev, MACSEC_OPTLS, 0); + nxp_c45_macsec_write(phydev, MACSEC_OOP1HS, 0); + nxp_c45_macsec_write(phydev, MACSEC_OOP2HS, 0); + nxp_c45_macsec_write(phydev, MACSEC_OOE1HS, 0); + nxp_c45_macsec_write(phydev, MACSEC_OOE2HS, 0); +} + +static void nxp_c45_set_rx_sc0_impl(struct phy_device *phydev, + bool enable) +{ + u32 reg = 0; + + nxp_c45_macsec_read(phydev, MACSEC_CFG, ®); + if (enable) + reg |= MACSEC_CFG_S0I; + else + reg &= ~MACSEC_CFG_S0I; + nxp_c45_macsec_write(phydev, MACSEC_CFG, reg); +} + +static bool nxp_c45_is_rx_sc0_impl(struct list_head *secy_list) +{ + struct nxp_c45_secy *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, secy_list, list) + if (pos->rx_sc0_impl) + return pos->rx_sc0_impl; + + return false; +} + +static void nxp_c45_rx_sc_en(struct phy_device *phydev, + struct macsec_rx_sc *rx_sc, bool en) +{ + u32 reg = 0; + + nxp_c45_macsec_read(phydev, MACSEC_RXSC_CFG, ®); + if (rx_sc->active && en) + reg |= MACSEC_RXSC_CFG_SCI_EN; + else + reg &= ~MACSEC_RXSC_CFG_SCI_EN; + nxp_c45_macsec_write(phydev, MACSEC_RXSC_CFG, reg); +} + +static void nxp_c45_rx_sc_update(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy) +{ + struct macsec_rx_sc *rx_sc = phy_secy->rx_sc; + struct nxp_c45_phy *priv = phydev->priv; + u32 cfg = 0; + + nxp_c45_macsec_read(phydev, MACSEC_RXSC_CFG, &cfg); + cfg &= ~MACSEC_RXSC_CFG_VF_MASK; + cfg = phy_secy->secy->validate_frames << MACSEC_RXSC_CFG_VF_OFF; + + phydev_dbg(phydev, "validate frames %u\n", + phy_secy->secy->validate_frames); + phydev_dbg(phydev, "replay_protect %s window %u\n", + phy_secy->secy->replay_protect ? "on" : "off", + phy_secy->secy->replay_window); + if (phy_secy->secy->replay_protect) { + cfg |= MACSEC_RXSC_CFG_RP; + nxp_c45_macsec_write(phydev, MACSEC_RPW, + phy_secy->secy->replay_window); + } else { + cfg &= ~MACSEC_RXSC_CFG_RP; + } + + phydev_dbg(phydev, "rx_sc->active %s\n", + rx_sc->active ? "on" : "off"); + if (rx_sc->active && + test_bit(phy_secy->secy_id, priv->macsec->secy_bitmap)) + cfg |= MACSEC_RXSC_CFG_SCI_EN; + else + cfg &= ~MACSEC_RXSC_CFG_SCI_EN; + + phydev_dbg(phydev, "key len %u\n", phy_secy->secy->key_len); + if (phy_secy->secy->key_len == 32) + cfg |= MACSEC_RXSC_CFG_AES_256; + else + cfg &= ~MACSEC_RXSC_CFG_AES_256; + + phydev_dbg(phydev, "XPN %s\n", phy_secy->secy->xpn ? "on" : "off"); + if (phy_secy->secy->xpn) + cfg |= MACSEC_RXSC_CFG_XPN; + else + cfg &= ~MACSEC_RXSC_CFG_XPN; + + nxp_c45_macsec_write(phydev, MACSEC_RXSC_CFG, cfg); +} + +static void nxp_c45_rx_sc_clear_stats(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy) +{ + struct nxp_c45_sa *pos, *tmp; + int i; + + list_for_each_entry_safe(pos, tmp, &phy_secy->sa_list, list) + if (pos->type == RX_SA) + nxp_c45_rx_sa_clear_stats(phydev, pos); + + nxp_c45_macsec_write(phydev, MACSEC_INOD1HS, 0); + nxp_c45_macsec_write(phydev, MACSEC_INOD2HS, 0); + + nxp_c45_macsec_write(phydev, MACSEC_INOV1HS, 0); + nxp_c45_macsec_write(phydev, MACSEC_INOV2HS, 0); + + nxp_c45_macsec_write(phydev, MACSEC_RXSCIPDS, 0); + nxp_c45_macsec_write(phydev, MACSEC_RXSCIPLS, 0); + nxp_c45_macsec_write(phydev, MACSEC_RXSCIPUS, 0); + + for (i = 0; i < MACSEC_NUM_AN; i++) { + nxp_c45_macsec_write(phydev, MACSEC_RXAN0INUSS + i * 4, 0); + nxp_c45_macsec_write(phydev, MACSEC_RXAN0IPUSS + i * 4, 0); + } +} + +static void nxp_c45_rx_sc_del(struct phy_device *phydev, + struct nxp_c45_secy *phy_secy) +{ + struct nxp_c45_sa *pos, *tmp; + + nxp_c45_macsec_write(phydev, MACSEC_RXSC_CFG, 0); + nxp_c45_macsec_write(phydev, MACSEC_RPW, 0); + nxp_c45_set_sci(phydev, MACSEC_RXSC_SCI_1H, 0); + + nxp_c45_rx_sc_clear_stats(phydev, phy_secy); + + list_for_each_entry_safe(pos, tmp, &phy_secy->sa_list, list) { + if (pos->type == RX_SA) { + nxp_c45_rx_sa_update(phydev, pos, false); + nxp_c45_sa_free(pos); + } + } +} + +static void nxp_c45_clear_global_stats(struct phy_device *phydev) +{ + nxp_c45_macsec_write(phydev, MACSEC_INPBTS, 0); + nxp_c45_macsec_write(phydev, MACSEC_INPWTS, 0); + nxp_c45_macsec_write(phydev, MACSEC_IPSNFS, 0); +} + +static void nxp_c45_macsec_en(struct phy_device *phydev, bool en) +{ + u32 reg; + + nxp_c45_macsec_read(phydev, MACSEC_CFG, ®); + if (en) + reg |= MACSEC_CFG_BYPASS; + else + reg &= ~MACSEC_CFG_BYPASS; + nxp_c45_macsec_write(phydev, MACSEC_CFG, reg); +} + +static int nxp_c45_mdo_dev_open(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + int any_bit_set; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + nxp_c45_tx_sc_en_flt(phydev, phy_secy->secy_id, true); + nxp_c45_set_rx_sc0_impl(phydev, phy_secy->rx_sc0_impl); + if (phy_secy->rx_sc) + nxp_c45_rx_sc_en(phydev, phy_secy->rx_sc, true); + + any_bit_set = find_first_bit(priv->macsec->secy_bitmap, TX_SC_MAX); + if (any_bit_set == TX_SC_MAX) + nxp_c45_macsec_en(phydev, true); + + set_bit(phy_secy->secy_id, priv->macsec->secy_bitmap); + + return 0; +} + +static int nxp_c45_mdo_dev_stop(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + int any_bit_set; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + nxp_c45_tx_sc_en_flt(phydev, phy_secy->secy_id, false); + if (phy_secy->rx_sc) + nxp_c45_rx_sc_en(phydev, phy_secy->rx_sc, false); + nxp_c45_set_rx_sc0_impl(phydev, false); + + clear_bit(phy_secy->secy_id, priv->macsec->secy_bitmap); + any_bit_set = find_first_bit(priv->macsec->secy_bitmap, TX_SC_MAX); + if (any_bit_set == TX_SC_MAX) + nxp_c45_macsec_en(phydev, false); + + return 0; +} + +static int nxp_c45_mdo_add_secy(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + bool can_rx_sc0_impl; + int idx; + + phydev_dbg(phydev, "add SecY SCI %016llx\n", + sci_to_cpu(ctx->secy->sci)); + + if (!nxp_c45_mac_addr_free(ctx)) + return -EBUSY; + + if (nxp_c45_is_rx_sc0_impl(&priv->macsec->secy_list)) + return -EBUSY; + + idx = find_first_zero_bit(priv->macsec->tx_sc_bitmap, TX_SC_MAX); + if (idx == TX_SC_MAX) + return -ENOSPC; + + phy_secy = kzalloc(sizeof(*phy_secy), GFP_KERNEL); + if (!phy_secy) + return -ENOMEM; + + INIT_LIST_HEAD(&phy_secy->sa_list); + phy_secy->secy = ctx->secy; + phy_secy->secy_id = idx; + + /* If the point to point mode should be enabled, we should have no + * SecY added yet. + */ + can_rx_sc0_impl = list_count_nodes(&priv->macsec->secy_list) == 0; + if (!nxp_c45_secy_valid(phy_secy, can_rx_sc0_impl)) { + kfree(phy_secy); + return -EINVAL; + } + + phy_secy->rx_sc0_impl = nxp_c45_rx_sc0_impl(phy_secy); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_set_sci(phydev, MACSEC_TXSC_SCI_1H, ctx->secy->sci); + nxp_c45_tx_sc_set_flt(phydev, phy_secy); + nxp_c45_tx_sc_update(phydev, phy_secy); + if (phy_interrupt_is_valid(phydev)) + nxp_c45_secy_irq_en(phydev, phy_secy, true); + + set_bit(idx, priv->macsec->tx_sc_bitmap); + list_add_tail(&phy_secy->list, &priv->macsec->secy_list); + + return 0; +} + +static void nxp_c45_tx_sa_next(struct nxp_c45_secy *phy_secy, + struct nxp_c45_sa *next_sa, u8 encoding_sa) +{ + struct nxp_c45_sa *sa; + + sa = nxp_c45_find_sa(&phy_secy->sa_list, TX_SA, encoding_sa); + if (!IS_ERR(sa)) { + memcpy(next_sa, sa, sizeof(*sa)); + } else { + next_sa->is_key_a = true; + next_sa->an = encoding_sa; + } +} + +static int nxp_c45_mdo_upd_secy(struct macsec_context *ctx) +{ + u8 encoding_sa = ctx->secy->tx_sc.encoding_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + struct nxp_c45_sa next_sa; + bool can_rx_sc0_impl; + + phydev_dbg(phydev, "update SecY SCI %016llx\n", + sci_to_cpu(ctx->secy->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + if (!nxp_c45_mac_addr_free(ctx)) + return -EBUSY; + + /* If the point to point mode should be enabled, we should have only + * one SecY added, respectively the updated one. + */ + can_rx_sc0_impl = list_count_nodes(&priv->macsec->secy_list) == 1; + if (!nxp_c45_secy_valid(phy_secy, can_rx_sc0_impl)) + return -EINVAL; + phy_secy->rx_sc0_impl = nxp_c45_rx_sc0_impl(phy_secy); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_tx_sc_set_flt(phydev, phy_secy); + nxp_c45_tx_sc_update(phydev, phy_secy); + nxp_c45_tx_sa_next(phy_secy, &next_sa, encoding_sa); + nxp_c45_tx_sa_update(phydev, &next_sa, ctx->secy->operational); + + nxp_c45_set_rx_sc0_impl(phydev, phy_secy->rx_sc0_impl); + if (phy_secy->rx_sc) + nxp_c45_rx_sc_update(phydev, phy_secy); + + return 0; +} + +static int nxp_c45_mdo_del_secy(struct macsec_context *ctx) +{ + u8 encoding_sa = ctx->secy->tx_sc.encoding_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + struct nxp_c45_sa next_sa; + + phydev_dbg(phydev, "delete SecY SCI %016llx\n", + sci_to_cpu(ctx->secy->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + nxp_c45_mdo_dev_stop(ctx); + nxp_c45_tx_sa_next(phy_secy, &next_sa, encoding_sa); + nxp_c45_tx_sa_update(phydev, &next_sa, false); + nxp_c45_tx_sc_clear_stats(phydev, phy_secy); + if (phy_secy->rx_sc) + nxp_c45_rx_sc_del(phydev, phy_secy); + + nxp_c45_sa_list_free(&phy_secy->sa_list); + if (phy_interrupt_is_valid(phydev)) + nxp_c45_secy_irq_en(phydev, phy_secy, false); + + clear_bit(phy_secy->secy_id, priv->macsec->tx_sc_bitmap); + nxp_c45_secy_free(phy_secy); + + if (list_empty(&priv->macsec->secy_list)) + nxp_c45_clear_global_stats(phydev); + + return 0; +} + +static int nxp_c45_mdo_add_rxsc(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + + phydev_dbg(phydev, "add RX SC SCI %016llx %s\n", + sci_to_cpu(ctx->rx_sc->sci), + ctx->rx_sc->active ? "enabled" : "disabled"); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + if (phy_secy->rx_sc) + return -ENOSPC; + + if (phy_secy->secy->tx_sc.end_station && + !nxp_c45_port_is_1(ctx->rx_sc->sci)) + return -EINVAL; + + phy_secy->rx_sc = ctx->rx_sc; + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_set_sci(phydev, MACSEC_RXSC_SCI_1H, ctx->rx_sc->sci); + nxp_c45_rx_sc_update(phydev, phy_secy); + + return 0; +} + +static int nxp_c45_mdo_upd_rxsc(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + + phydev_dbg(phydev, "update RX SC SCI %016llx %s\n", + sci_to_cpu(ctx->rx_sc->sci), + ctx->rx_sc->active ? "enabled" : "disabled"); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_rx_sc_update(phydev, phy_secy); + + return 0; +} + +static int nxp_c45_mdo_del_rxsc(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + + phydev_dbg(phydev, "delete RX SC SCI %016llx %s\n", + sci_to_cpu(ctx->rx_sc->sci), + ctx->rx_sc->active ? "enabled" : "disabled"); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_rx_sc_del(phydev, phy_secy); + phy_secy->rx_sc = NULL; + + return 0; +} + +static int nxp_c45_mdo_add_rxsa(struct macsec_context *ctx) +{ + struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phydev_dbg(phydev, "add RX SA %u %s to RX SC SCI %016llx\n", + an, rx_sa->active ? "enabled" : "disabled", + sci_to_cpu(rx_sa->sc->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_sa_alloc(&phy_secy->sa_list, rx_sa, RX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_sa_set_pn(phydev, sa, rx_sa->next_pn, + ctx->secy->replay_window); + nxp_c45_sa_set_key(ctx, sa->regs, rx_sa->key.salt.bytes, rx_sa->ssci); + nxp_c45_rx_sa_update(phydev, sa, rx_sa->active); + + return 0; +} + +static int nxp_c45_mdo_upd_rxsa(struct macsec_context *ctx) +{ + struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phydev_dbg(phydev, "update RX SA %u %s to RX SC SCI %016llx\n", + an, rx_sa->active ? "enabled" : "disabled", + sci_to_cpu(rx_sa->sc->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_find_sa(&phy_secy->sa_list, RX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + if (ctx->sa.update_pn) + nxp_c45_sa_set_pn(phydev, sa, rx_sa->next_pn, + ctx->secy->replay_window); + nxp_c45_rx_sa_update(phydev, sa, rx_sa->active); + + return 0; +} + +static int nxp_c45_mdo_del_rxsa(struct macsec_context *ctx) +{ + struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phydev_dbg(phydev, "delete RX SA %u %s to RX SC SCI %016llx\n", + an, rx_sa->active ? "enabled" : "disabled", + sci_to_cpu(rx_sa->sc->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_find_sa(&phy_secy->sa_list, RX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_rx_sa_update(phydev, sa, false); + nxp_c45_rx_sa_clear_stats(phydev, sa); + + nxp_c45_sa_free(sa); + + return 0; +} + +static int nxp_c45_mdo_add_txsa(struct macsec_context *ctx) +{ + struct macsec_tx_sa *tx_sa = ctx->sa.tx_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phydev_dbg(phydev, "add TX SA %u %s to TX SC %016llx\n", + an, ctx->sa.tx_sa->active ? "enabled" : "disabled", + sci_to_cpu(ctx->secy->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_sa_alloc(&phy_secy->sa_list, tx_sa, TX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_sa_set_pn(phydev, sa, tx_sa->next_pn, 0); + nxp_c45_sa_set_key(ctx, sa->regs, tx_sa->key.salt.bytes, tx_sa->ssci); + if (ctx->secy->tx_sc.encoding_sa == sa->an) + nxp_c45_tx_sa_update(phydev, sa, tx_sa->active); + + return 0; +} + +static int nxp_c45_mdo_upd_txsa(struct macsec_context *ctx) +{ + struct macsec_tx_sa *tx_sa = ctx->sa.tx_sa; + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phydev_dbg(phydev, "update TX SA %u %s to TX SC %016llx\n", + an, ctx->sa.tx_sa->active ? "enabled" : "disabled", + sci_to_cpu(ctx->secy->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_find_sa(&phy_secy->sa_list, TX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + if (ctx->sa.update_pn) + nxp_c45_sa_set_pn(phydev, sa, tx_sa->next_pn, 0); + if (ctx->secy->tx_sc.encoding_sa == sa->an) + nxp_c45_tx_sa_update(phydev, sa, tx_sa->active); + + return 0; +} + +static int nxp_c45_mdo_del_txsa(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phydev_dbg(phydev, "delete TX SA %u %s to TX SC %016llx\n", + an, ctx->sa.tx_sa->active ? "enabled" : "disabled", + sci_to_cpu(ctx->secy->sci)); + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_find_sa(&phy_secy->sa_list, TX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + nxp_c45_select_secy(phydev, phy_secy->secy_id); + if (ctx->secy->tx_sc.encoding_sa == sa->an) + nxp_c45_tx_sa_update(phydev, sa, false); + nxp_c45_tx_sa_clear_stats(phydev, sa); + + nxp_c45_sa_free(sa); + + return 0; +} + +static int nxp_c45_mdo_get_dev_stats(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct macsec_dev_stats *dev_stats; + struct nxp_c45_secy *phy_secy; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + dev_stats = ctx->stats.dev_stats; + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + nxp_c45_macsec_read32_64(phydev, MACSEC_OPUS, + &dev_stats->OutPktsUntagged); + nxp_c45_macsec_read32_64(phydev, MACSEC_OPTLS, + &dev_stats->OutPktsTooLong); + nxp_c45_macsec_read32_64(phydev, MACSEC_INPBTS, + &dev_stats->InPktsBadTag); + + if (phy_secy->secy->validate_frames == MACSEC_VALIDATE_STRICT) + nxp_c45_macsec_read32_64(phydev, MACSEC_INPWTS, + &dev_stats->InPktsNoTag); + else + nxp_c45_macsec_read32_64(phydev, MACSEC_INPWTS, + &dev_stats->InPktsUntagged); + + if (phy_secy->secy->validate_frames == MACSEC_VALIDATE_STRICT) + nxp_c45_macsec_read32_64(phydev, MACSEC_IPSNFS, + &dev_stats->InPktsNoSCI); + else + nxp_c45_macsec_read32_64(phydev, MACSEC_IPSNFS, + &dev_stats->InPktsUnknownSCI); + + /* Always 0. */ + dev_stats->InPktsOverrun = 0; + + return 0; +} + +static int nxp_c45_mdo_get_tx_sc_stats(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct macsec_tx_sa_stats tx_sa_stats; + struct macsec_tx_sc_stats *stats; + struct nxp_c45_secy *phy_secy; + struct nxp_c45_sa *pos, *tmp; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + stats = ctx->stats.tx_sc_stats; + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + nxp_c45_macsec_read64(phydev, MACSEC_OOE1HS, + &stats->OutOctetsEncrypted); + nxp_c45_macsec_read64(phydev, MACSEC_OOP1HS, + &stats->OutOctetsProtected); + list_for_each_entry_safe(pos, tmp, &phy_secy->sa_list, list) { + if (pos->type != TX_SA) + continue; + + memset(&tx_sa_stats, 0, sizeof(tx_sa_stats)); + nxp_c45_tx_sa_read_stats(phydev, pos, &tx_sa_stats); + + stats->OutPktsEncrypted += tx_sa_stats.OutPktsEncrypted; + stats->OutPktsProtected += tx_sa_stats.OutPktsProtected; + } + + return 0; +} + +static int nxp_c45_mdo_get_tx_sa_stats(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct macsec_tx_sa_stats *stats; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_find_sa(&phy_secy->sa_list, TX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + stats = ctx->stats.tx_sa_stats; + nxp_c45_select_secy(phydev, phy_secy->secy_id); + nxp_c45_tx_sa_read_stats(phydev, sa, stats); + + return 0; +} + +static int nxp_c45_mdo_get_rx_sc_stats(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct macsec_rx_sa_stats rx_sa_stats; + struct macsec_rx_sc_stats *stats; + struct nxp_c45_secy *phy_secy; + struct nxp_c45_sa *pos, *tmp; + u32 reg = 0; + int i; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + if (phy_secy->rx_sc != ctx->rx_sc) + return -EINVAL; + + stats = ctx->stats.rx_sc_stats; + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + list_for_each_entry_safe(pos, tmp, &phy_secy->sa_list, list) { + if (pos->type != RX_SA) + continue; + + memset(&rx_sa_stats, 0, sizeof(rx_sa_stats)); + nxp_c45_rx_sa_read_stats(phydev, pos, &rx_sa_stats); + + stats->InPktsInvalid += rx_sa_stats.InPktsInvalid; + stats->InPktsNotValid += rx_sa_stats.InPktsNotValid; + stats->InPktsOK += rx_sa_stats.InPktsOK; + } + + for (i = 0; i < MACSEC_NUM_AN; i++) { + nxp_c45_macsec_read(phydev, MACSEC_RXAN0INUSS + i * 4, ®); + stats->InPktsNotUsingSA += reg; + nxp_c45_macsec_read(phydev, MACSEC_RXAN0IPUSS + i * 4, ®); + stats->InPktsUnusedSA += reg; + } + + nxp_c45_macsec_read64(phydev, MACSEC_INOD1HS, + &stats->InOctetsDecrypted); + nxp_c45_macsec_read64(phydev, MACSEC_INOV1HS, + &stats->InOctetsValidated); + + nxp_c45_macsec_read32_64(phydev, MACSEC_RXSCIPDS, + &stats->InPktsDelayed); + nxp_c45_macsec_read32_64(phydev, MACSEC_RXSCIPLS, + &stats->InPktsLate); + nxp_c45_macsec_read32_64(phydev, MACSEC_RXSCIPUS, + &stats->InPktsUnchecked); + + return 0; +} + +static int nxp_c45_mdo_get_rx_sa_stats(struct macsec_context *ctx) +{ + struct phy_device *phydev = ctx->phydev; + struct nxp_c45_phy *priv = phydev->priv; + struct macsec_rx_sa_stats *stats; + struct nxp_c45_secy *phy_secy; + u8 an = ctx->sa.assoc_num; + struct nxp_c45_sa *sa; + + phy_secy = nxp_c45_find_secy(&priv->macsec->secy_list, ctx->secy->sci); + if (IS_ERR(phy_secy)) + return PTR_ERR(phy_secy); + + sa = nxp_c45_find_sa(&phy_secy->sa_list, RX_SA, an); + if (IS_ERR(sa)) + return PTR_ERR(sa); + + stats = ctx->stats.rx_sa_stats; + nxp_c45_select_secy(phydev, phy_secy->secy_id); + + nxp_c45_rx_sa_read_stats(phydev, sa, stats); + nxp_c45_macsec_read(phydev, MACSEC_RXAN0INUSS + an * 4, + &stats->InPktsNotUsingSA); + nxp_c45_macsec_read(phydev, MACSEC_RXAN0IPUSS + an * 4, + &stats->InPktsUnusedSA); + + return 0; +} + +struct tja11xx_tlv_header { + struct ethhdr eth; + u8 subtype; + u8 len; + u8 payload[28]; +}; + +static int nxp_c45_mdo_insert_tx_tag(struct phy_device *phydev, + struct sk_buff *skb) +{ + struct tja11xx_tlv_header *tlv; + struct ethhdr *eth; + + eth = eth_hdr(skb); + tlv = skb_push(skb, TJA11XX_TLV_TX_NEEDED_HEADROOM); + memmove(tlv, eth, sizeof(*eth)); + skb_reset_mac_header(skb); + tlv->eth.h_proto = htons(ETH_P_TJA11XX_TLV); + tlv->subtype = 1; + tlv->len = sizeof(tlv->payload); + memset(tlv->payload, 0, sizeof(tlv->payload)); + + return 0; +} + +static const struct macsec_ops nxp_c45_macsec_ops = { + .mdo_dev_open = nxp_c45_mdo_dev_open, + .mdo_dev_stop = nxp_c45_mdo_dev_stop, + .mdo_add_secy = nxp_c45_mdo_add_secy, + .mdo_upd_secy = nxp_c45_mdo_upd_secy, + .mdo_del_secy = nxp_c45_mdo_del_secy, + .mdo_add_rxsc = nxp_c45_mdo_add_rxsc, + .mdo_upd_rxsc = nxp_c45_mdo_upd_rxsc, + .mdo_del_rxsc = nxp_c45_mdo_del_rxsc, + .mdo_add_rxsa = nxp_c45_mdo_add_rxsa, + .mdo_upd_rxsa = nxp_c45_mdo_upd_rxsa, + .mdo_del_rxsa = nxp_c45_mdo_del_rxsa, + .mdo_add_txsa = nxp_c45_mdo_add_txsa, + .mdo_upd_txsa = nxp_c45_mdo_upd_txsa, + .mdo_del_txsa = nxp_c45_mdo_del_txsa, + .mdo_get_dev_stats = nxp_c45_mdo_get_dev_stats, + .mdo_get_tx_sc_stats = nxp_c45_mdo_get_tx_sc_stats, + .mdo_get_tx_sa_stats = nxp_c45_mdo_get_tx_sa_stats, + .mdo_get_rx_sc_stats = nxp_c45_mdo_get_rx_sc_stats, + .mdo_get_rx_sa_stats = nxp_c45_mdo_get_rx_sa_stats, + .mdo_insert_tx_tag = nxp_c45_mdo_insert_tx_tag, + .needed_headroom = TJA11XX_TLV_TX_NEEDED_HEADROOM, + .needed_tailroom = TJA11XX_TLV_NEEDED_TAILROOM, +}; + +int nxp_c45_macsec_config_init(struct phy_device *phydev) +{ + struct nxp_c45_phy *priv = phydev->priv; + int ret; + + if (!priv->macsec) + return 0; + + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_FUNC_ENABLES, + MACSEC_EN | ADAPTER_EN); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, ADPTR_CNTRL, ADPTR_CNTRL_CONFIG_EN | + ADPTR_CNTRL_ADPTR_EN); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, ADPTR_TX_TAG_CNTRL, + ADPTR_TX_TAG_CNTRL_ENA); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, ADPTR_CNTRL, ADPTR_CNTRL_ADPTR_EN); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, MACSEC_TPNET, PN_WRAP_THRESHOLD); + if (ret) + return ret; + + /* Set MKA filter. */ + ret = nxp_c45_macsec_write(phydev, MACSEC_UPFR0D2, ETH_P_PAE); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, MACSEC_UPFR0M1, MACSEC_OVP); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, MACSEC_UPFR0M2, ETYPE_MASK); + if (ret) + return ret; + + ret = nxp_c45_macsec_write(phydev, MACSEC_UPFR0R, MACSEC_UPFR_EN); + + return ret; +} + +int nxp_c45_macsec_probe(struct phy_device *phydev) +{ + struct nxp_c45_phy *priv = phydev->priv; + struct device *dev = &phydev->mdio.dev; + + priv->macsec = devm_kzalloc(dev, sizeof(*priv->macsec), GFP_KERNEL); + if (!priv->macsec) + return -ENOMEM; + + INIT_LIST_HEAD(&priv->macsec->secy_list); + phydev->macsec_ops = &nxp_c45_macsec_ops; + + return 0; +} + +void nxp_c45_macsec_remove(struct phy_device *phydev) +{ + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *secy_p, *secy_t; + struct nxp_c45_sa *sa_p, *sa_t; + struct list_head *secy_list; + + if (!priv->macsec) + return; + + secy_list = &priv->macsec->secy_list; + nxp_c45_macsec_en(phydev, false); + + list_for_each_entry_safe(secy_p, secy_t, secy_list, list) { + list_for_each_entry_safe(sa_p, sa_t, &secy_p->sa_list, list) + nxp_c45_sa_free(sa_p); + nxp_c45_secy_free(secy_p); + } +} + +void nxp_c45_handle_macsec_interrupt(struct phy_device *phydev, + irqreturn_t *ret) +{ + struct nxp_c45_phy *priv = phydev->priv; + struct nxp_c45_secy *secy; + struct nxp_c45_sa *sa; + u8 encoding_sa; + int secy_id; + u32 reg = 0; + + if (!priv->macsec) + return; + + do { + nxp_c45_macsec_read(phydev, MACSEC_EVR, ®); + if (!reg) + return; + + secy_id = MACSEC_REG_SIZE - ffs(reg); + secy = nxp_c45_find_secy_by_id(&priv->macsec->secy_list, + secy_id); + if (IS_ERR(secy)) { + WARN_ON(1); + goto macsec_ack_irq; + } + + encoding_sa = secy->secy->tx_sc.encoding_sa; + phydev_dbg(phydev, "pn_wrapped: TX SC %d, encoding_sa %u\n", + secy->secy_id, encoding_sa); + + sa = nxp_c45_find_sa(&secy->sa_list, TX_SA, encoding_sa); + if (!IS_ERR(sa)) + macsec_pn_wrapped(secy->secy, sa->sa); + else + WARN_ON(1); + +macsec_ack_irq: + nxp_c45_macsec_write(phydev, MACSEC_EVR, + TX_SC_BIT(secy_id)); + *ret = IRQ_HANDLED; + } while (reg); +} diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 780ad353cf55..3cf614b4cd52 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* NXP C45 PHY driver - * Copyright (C) 2021 NXP + * Copyright 2021-2023 NXP * Author: Radu Pirea <radu-nicolae.pirea@oss.nxp.com> */ @@ -14,9 +14,10 @@ #include <linux/processor.h> #include <linux/property.h> #include <linux/ptp_classify.h> -#include <linux/ptp_clock_kernel.h> #include <linux/net_tstamp.h> +#include "nxp-c45-tja11xx.h" + #define PHY_ID_TJA_1103 0x001BB010 #define PHY_ID_TJA_1120 0x001BB031 @@ -75,9 +76,11 @@ #define PORT_CONTROL_EN BIT(14) #define VEND1_PORT_ABILITIES 0x8046 +#define MACSEC_ABILITY BIT(5) #define PTP_ABILITY BIT(3) #define VEND1_PORT_FUNC_IRQ_EN 0x807A +#define MACSEC_IRQS BIT(5) #define PTP_IRQS BIT(3) #define VEND1_PTP_IRQ_ACK 0x9008 @@ -148,7 +151,6 @@ #define TS_SEC_MASK GENMASK(1, 0) -#define VEND1_PORT_FUNC_ENABLES 0x8048 #define PTP_ENABLE BIT(3) #define PHY_TEST_ENABLE BIT(0) @@ -281,25 +283,6 @@ struct nxp_c45_phy_data { irqreturn_t *irq_status); }; -struct nxp_c45_phy { - const struct nxp_c45_phy_data *phy_data; - struct phy_device *phydev; - struct mii_timestamper mii_ts; - struct ptp_clock *ptp_clock; - struct ptp_clock_info caps; - struct sk_buff_head tx_queue; - struct sk_buff_head rx_queue; - /* used to access the PTP registers atomic */ - struct mutex ptp_lock; - int hwts_tx; - int hwts_rx; - u32 tx_delay; - u32 rx_delay; - struct timespec64 extts_ts; - int extts_index; - bool extts; -}; - static const struct nxp_c45_phy_data *nxp_c45_get_data(struct phy_device *phydev) { @@ -1215,12 +1198,25 @@ static int nxp_c45_start_op(struct phy_device *phydev) static int nxp_c45_config_intr(struct phy_device *phydev) { - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + int ret; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PORT_FUNC_IRQ_EN, MACSEC_IRQS); + if (ret) + return ret; + return phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_IRQ_EN, PHY_IRQ_LINK_EVENT); - else - return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, - VEND1_PHY_IRQ_EN, PHY_IRQ_LINK_EVENT); + } + + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PORT_FUNC_IRQ_EN, MACSEC_IRQS); + if (ret) + return ret; + + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PHY_IRQ_EN, PHY_IRQ_LINK_EVENT); } static int tja1103_config_intr(struct phy_device *phydev) @@ -1286,6 +1282,7 @@ static irqreturn_t nxp_c45_handle_interrupt(struct phy_device *phydev) } data->nmi_handler(phydev, &ret); + nxp_c45_handle_macsec_interrupt(phydev, &ret); return ret; } @@ -1611,6 +1608,9 @@ static int nxp_c45_config_init(struct phy_device *phydev) nxp_c45_counters_enable(phydev); nxp_c45_ptp_init(phydev); + ret = nxp_c45_macsec_config_init(phydev); + if (ret) + return ret; return nxp_c45_start_op(phydev); } @@ -1626,7 +1626,9 @@ static int nxp_c45_get_features(struct phy_device *phydev) static int nxp_c45_probe(struct phy_device *phydev) { struct nxp_c45_phy *priv; - int ptp_ability; + bool macsec_ability; + int phy_abilities; + bool ptp_ability; int ret = 0; priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); @@ -1642,9 +1644,9 @@ static int nxp_c45_probe(struct phy_device *phydev) mutex_init(&priv->ptp_lock); - ptp_ability = phy_read_mmd(phydev, MDIO_MMD_VEND1, - VEND1_PORT_ABILITIES); - ptp_ability = !!(ptp_ability & PTP_ABILITY); + phy_abilities = phy_read_mmd(phydev, MDIO_MMD_VEND1, + VEND1_PORT_ABILITIES); + ptp_ability = !!(phy_abilities & PTP_ABILITY); if (!ptp_ability) { phydev_dbg(phydev, "the phy does not support PTP"); goto no_ptp_support; @@ -1663,6 +1665,20 @@ static int nxp_c45_probe(struct phy_device *phydev) } no_ptp_support: + macsec_ability = !!(phy_abilities & MACSEC_ABILITY); + if (!macsec_ability) { + phydev_info(phydev, "the phy does not support MACsec\n"); + goto no_macsec_support; + } + + if (IS_ENABLED(CONFIG_MACSEC)) { + ret = nxp_c45_macsec_probe(phydev); + phydev_dbg(phydev, "MACsec support enabled."); + } else { + phydev_dbg(phydev, "MACsec support not enabled even if the phy supports it"); + } + +no_macsec_support: return ret; } @@ -1676,6 +1692,7 @@ static void nxp_c45_remove(struct phy_device *phydev) skb_queue_purge(&priv->tx_queue); skb_queue_purge(&priv->rx_queue); + nxp_c45_macsec_remove(phydev); } static void tja1103_counters_enable(struct phy_device *phydev) diff --git a/drivers/net/phy/nxp-c45-tja11xx.h b/drivers/net/phy/nxp-c45-tja11xx.h new file mode 100644 index 000000000000..f364fca68f0b --- /dev/null +++ b/drivers/net/phy/nxp-c45-tja11xx.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* NXP C45 PHY driver header file + * Copyright 2023 NXP + * Author: Radu Pirea <radu-nicolae.pirea@oss.nxp.com> + */ + +#include <linux/ptp_clock_kernel.h> + +#define VEND1_PORT_FUNC_ENABLES 0x8048 + +struct nxp_c45_macsec; + +struct nxp_c45_phy { + const struct nxp_c45_phy_data *phy_data; + struct phy_device *phydev; + struct mii_timestamper mii_ts; + struct ptp_clock *ptp_clock; + struct ptp_clock_info caps; + struct sk_buff_head tx_queue; + struct sk_buff_head rx_queue; + /* used to access the PTP registers atomic */ + struct mutex ptp_lock; + int hwts_tx; + int hwts_rx; + u32 tx_delay; + u32 rx_delay; + struct timespec64 extts_ts; + int extts_index; + bool extts; + struct nxp_c45_macsec *macsec; +}; + +#if IS_ENABLED(CONFIG_MACSEC) +int nxp_c45_macsec_config_init(struct phy_device *phydev); +int nxp_c45_macsec_probe(struct phy_device *phydev); +void nxp_c45_macsec_remove(struct phy_device *phydev); +void nxp_c45_handle_macsec_interrupt(struct phy_device *phydev, + irqreturn_t *ret); +#else +static inline +int nxp_c45_macsec_config_init(struct phy_device *phydev) +{ + return 0; +} + +static inline +int nxp_c45_macsec_probe(struct phy_device *phydev) +{ + return 0; +} + +static inline +void nxp_c45_macsec_remove(struct phy_device *phydev) +{ +} + +static inline +void nxp_c45_handle_macsec_interrupt(struct phy_device *phydev, + irqreturn_t *ret) +{ +} +#endif diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0c52a9eff188..3611ea64875e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1550,7 +1550,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, goto error; phy_resume(phydev); - phy_led_triggers_register(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by @@ -1822,7 +1823,8 @@ void phy_detach(struct phy_device *phydev) } phydev->phylink = NULL; - phy_led_triggers_unregister(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 298dfd6982a5..ed0b4ccaa6a6 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1074,6 +1074,72 @@ static void phylink_pcs_an_restart(struct phylink *pl) pl->pcs->ops->pcs_an_restart(pl->pcs); } +/** + * phylink_pcs_neg_mode() - helper to determine PCS inband mode + * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @interface: interface mode to be used + * @advertising: adertisement ethtool link mode mask + * + * Determines the negotiation mode to be used by the PCS, and returns + * one of: + * + * - %PHYLINK_PCS_NEG_NONE: interface mode does not support inband + * - %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY) + * will be used. + * - %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg + * disabled + * - %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled + * + * Note: this is for cases where the PCS itself is involved in negotiation + * (e.g. Clause 37, SGMII and similar) not Clause 73. + */ +static unsigned int phylink_pcs_neg_mode(unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising) +{ + unsigned int neg_mode; + + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_QUSGMII: + case PHY_INTERFACE_MODE_USXGMII: + /* These protocols are designed for use with a PHY which + * communicates its negotiation result back to the MAC via + * inband communication. Note: there exist PHYs that run + * with SGMII but do not send the inband data. + */ + if (!phylink_autoneg_inband(mode)) + neg_mode = PHYLINK_PCS_NEG_OUTBAND; + else + neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; + break; + + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + /* 1000base-X is designed for use media-side for Fibre + * connections, and thus the Autoneg bit needs to be + * taken into account. We also do this for 2500base-X + * as well, but drivers may not support this, so may + * need to override this. + */ + if (!phylink_autoneg_inband(mode)) + neg_mode = PHYLINK_PCS_NEG_OUTBAND; + else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + advertising)) + neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; + else + neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED; + break; + + default: + neg_mode = PHYLINK_PCS_NEG_NONE; + break; + } + + return neg_mode; +} + static void phylink_major_config(struct phylink *pl, bool restart, const struct phylink_link_state *state) { diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3780a96d2caa..f75c9eb3958e 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -274,6 +274,7 @@ struct sfp { struct sfp_eeprom_id id; unsigned int module_power_mW; unsigned int module_t_start_up; + unsigned int module_t_wait; unsigned int phy_t_retry; unsigned int rate_kbd; @@ -388,6 +389,12 @@ static void sfp_fixup_fs_10gt(struct sfp *sfp) { sfp_fixup_10gbaset_30m(sfp); sfp_fixup_rollball(sfp); + + /* The RollBall fixup is not enough for FS modules, the AQR chip inside + * them does not return 0xffff for PHY ID registers in all MMDs for the + * while initializing. They need a 4 second wait before accessing PHY. + */ + sfp->module_t_wait = msecs_to_jiffies(4000); } static void sfp_fixup_halny_gsfp(struct sfp *sfp) @@ -2329,6 +2336,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) mask |= SFP_F_RS1; sfp->module_t_start_up = T_START_UP; + sfp->module_t_wait = T_WAIT; sfp->phy_t_retry = T_PHY_RETRY; sfp->state_ignore_mask = 0; @@ -2566,9 +2574,10 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) /* We need to check the TX_FAULT state, which is not defined * while TX_DISABLE is asserted. The earliest we want to do - * anything (such as probe for a PHY) is 50ms. + * anything (such as probe for a PHY) is 50ms (or more on + * specific modules). */ - sfp_sm_next(sfp, SFP_S_WAIT, T_WAIT); + sfp_sm_next(sfp, SFP_S_WAIT, sfp->module_t_wait); break; case SFP_S_WAIT: @@ -2582,8 +2591,8 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) * deasserting. */ timeout = sfp->module_t_start_up; - if (timeout > T_WAIT) - timeout -= T_WAIT; + if (timeout > sfp->module_t_wait) + timeout -= sfp->module_t_wait; else timeout = 1; diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index fbaaa8c102a1..840da924708b 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -533,7 +533,7 @@ ppp_async_encode(struct asyncppp *ap) proto = get_unaligned_be16(data); /* - * LCP packets with code values between 1 (configure-reqest) + * LCP packets with code values between 1 (configure-request) * and 7 (code-reject) must be sent as though no options * had been negotiated. */ diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 764ea02ff911..16106e088c63 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3235,6 +3235,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_fdb_get = vxlan_fdb_get, .ndo_mdb_add = vxlan_mdb_add, .ndo_mdb_del = vxlan_mdb_del, + .ndo_mdb_del_bulk = vxlan_mdb_del_bulk, .ndo_mdb_dump = vxlan_mdb_dump, .ndo_mdb_get = vxlan_mdb_get, .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index eb4c580b5cee..60eb95a06d55 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -74,6 +74,14 @@ struct vxlan_mdb_config { u8 rt_protocol; }; +struct vxlan_mdb_flush_desc { + union vxlan_addr remote_ip; + __be32 src_vni; + __be32 remote_vni; + __be16 remote_port; + u8 rt_protocol; +}; + static const struct rhashtable_params vxlan_mdb_rht_params = { .head_offset = offsetof(struct vxlan_mdb_entry, rhnode), .key_offset = offsetof(struct vxlan_mdb_entry, key), @@ -1306,6 +1314,145 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], return err; } +static const struct nla_policy +vxlan_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), + [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), + [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 }, + [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), + [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range), + [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT), +}; + +static int vxlan_mdb_flush_desc_init(struct vxlan_dev *vxlan, + struct vxlan_mdb_flush_desc *desc, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + if (entry->ifindex && entry->ifindex != vxlan->dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port net device"); + return -EINVAL; + } + + if (entry->vid) { + NL_SET_ERR_MSG_MOD(extack, "VID must not be specified"); + return -EINVAL; + } + + if (!tb[MDBA_SET_ENTRY_ATTRS]) + return 0; + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_SET_ENTRY_ATTRS], + vxlan_mdbe_attrs_del_bulk_pol, extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) { + u8 state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]); + + if ((state_mask & MDB_PERMANENT) && !(entry->state & MDB_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, "Only permanent MDB entries are supported"); + return -EINVAL; + } + } + + if (mdbe_attrs[MDBE_ATTR_RTPROT]) + desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); + + if (mdbe_attrs[MDBE_ATTR_DST]) + vxlan_nla_get_addr(&desc->remote_ip, mdbe_attrs[MDBE_ATTR_DST]); + + if (mdbe_attrs[MDBE_ATTR_DST_PORT]) + desc->remote_port = + cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT])); + + if (mdbe_attrs[MDBE_ATTR_VNI]) + desc->remote_vni = + cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI])); + + if (mdbe_attrs[MDBE_ATTR_SRC_VNI]) + desc->src_vni = + cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI])); + + return 0; +} + +static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan, + struct vxlan_mdb_entry *mdb_entry, + const struct vxlan_mdb_flush_desc *desc) +{ + struct vxlan_mdb_remote *remote, *tmp; + + list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) { + struct vxlan_rdst *rd = rtnl_dereference(remote->rd); + __be32 remote_vni; + + if (desc->remote_ip.sa.sa_family && + !vxlan_addr_equal(&desc->remote_ip, &rd->remote_ip)) + continue; + + /* Encapsulation is performed with source VNI if remote VNI + * is not set. + */ + remote_vni = rd->remote_vni ? : mdb_entry->key.vni; + if (desc->remote_vni && desc->remote_vni != remote_vni) + continue; + + if (desc->remote_port && desc->remote_port != rd->remote_port) + continue; + + if (desc->rt_protocol && + desc->rt_protocol != remote->rt_protocol) + continue; + + vxlan_mdb_remote_del(vxlan, mdb_entry, remote); + } +} + +static void vxlan_mdb_flush(struct vxlan_dev *vxlan, + const struct vxlan_mdb_flush_desc *desc) +{ + struct vxlan_mdb_entry *mdb_entry; + struct hlist_node *tmp; + + /* The removal of an entry cannot trigger the removal of another entry + * since entries are always added to the head of the list. + */ + hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) { + if (desc->src_vni && desc->src_vni != mdb_entry->key.vni) + continue; + + vxlan_mdb_remotes_flush(vxlan, mdb_entry, desc); + /* Entry will only be removed if its remotes list is empty. */ + vxlan_mdb_entry_put(vxlan, mdb_entry); + } +} + +int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_mdb_flush_desc desc = {}; + int err; + + ASSERT_RTNL(); + + err = vxlan_mdb_flush_desc_init(vxlan, &desc, tb, extack); + if (err) + return err; + + vxlan_mdb_flush(vxlan, &desc); + + return 0; +} + static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), @@ -1575,29 +1722,6 @@ static void vxlan_mdb_check_empty(void *ptr, void *arg) WARN_ON_ONCE(1); } -static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan, - struct vxlan_mdb_entry *mdb_entry) -{ - struct vxlan_mdb_remote *remote, *tmp; - - list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) - vxlan_mdb_remote_del(vxlan, mdb_entry, remote); -} - -static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan) -{ - struct vxlan_mdb_entry *mdb_entry; - struct hlist_node *tmp; - - /* The removal of an entry cannot trigger the removal of another entry - * since entries are always added to the head of the list. - */ - hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) { - vxlan_mdb_remotes_flush(vxlan, mdb_entry); - vxlan_mdb_entry_put(vxlan, mdb_entry); - } -} - int vxlan_mdb_init(struct vxlan_dev *vxlan) { int err; @@ -1613,7 +1737,9 @@ int vxlan_mdb_init(struct vxlan_dev *vxlan) void vxlan_mdb_fini(struct vxlan_dev *vxlan) { - vxlan_mdb_entries_flush(vxlan); + struct vxlan_mdb_flush_desc desc = {}; + + vxlan_mdb_flush(vxlan, &desc); WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB); rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty, NULL); diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h index db679c380955..b35d96b78843 100644 --- a/drivers/net/vxlan/vxlan_private.h +++ b/drivers/net/vxlan/vxlan_private.h @@ -235,6 +235,8 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); +int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack); struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index ab0c72c55b2d..0f405ded1a7a 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -1384,7 +1384,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, * if it is true then one of the handlers took the page. */ - if (reclaim) { + if (reclaim && txq) { u16 sequence = le16_to_cpu(pkt->hdr.sequence); int index = SEQ_TO_INDEX(sequence); int cmd_index = iwl_txq_get_cmd_index(txq, index); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index f39c436f0b6d..fc64e1e7f5ee 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3092,7 +3092,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, struct iwl_rxq *rxq = &trans_pcie->rxq[0]; u32 i, r, j, rb_len = 0; - spin_lock(&rxq->lock); + spin_lock_bh(&rxq->lock); r = iwl_get_closed_rb_stts(trans, rxq); @@ -3116,7 +3116,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, *data = iwl_fw_error_next_data(*data); } - spin_unlock(&rxq->lock); + spin_unlock_bh(&rxq->lock); return rb_len; } diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 8bbb0e17229d..00230f106294 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -878,7 +878,7 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid) static void mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, - int len, bool more, u32 info) + int len, bool more, u32 info, bool allow_direct) { struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -890,7 +890,7 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size); } else { - mt76_put_page_pool_buf(data, true); + mt76_put_page_pool_buf(data, allow_direct); } if (more) @@ -910,6 +910,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) struct sk_buff *skb; unsigned char *data; bool check_ddone = false; + bool allow_direct = !mt76_queue_is_wed_rx(q); bool more; if (IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED) && @@ -950,7 +951,8 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) } if (q->rx_head) { - mt76_add_fragment(dev, q, data, len, more, info); + mt76_add_fragment(dev, q, data, len, more, info, + allow_direct); continue; } @@ -979,7 +981,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) continue; free_frag: - mt76_put_page_pool_buf(data, true); + mt76_put_page_pool_buf(data, allow_direct); } mt76_dma_rx_fill(dev, q, true); diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 6902e97719d1..11c80555d975 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -968,9 +968,12 @@ static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie) static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) { - /* Downstream devices need to be in D0 state before enabling PCI PM substates */ + /* + * Downstream devices need to be in D0 state before enabling PCI PM + * substates. + */ pci_set_power_state(pdev, PCI_D0); - pci_enable_link_state(pdev, PCIE_LINK_STATE_ALL); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); return 0; } diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index d45e7b8dc530..8b34ccff073a 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -80,13 +80,49 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_LPC, system_bus_quirk); +/* + * Some Loongson PCIe ports have hardware limitations on their Maximum Read + * Request Size. They can't handle anything larger than this. Sane + * firmware will set proper MRRS at boot, so we only need no_inc_mrrs for + * bridges. However, some MIPS Loongson firmware doesn't set MRRS properly, + * so we have to enforce maximum safe MRRS, which is 256 bytes. + */ +#ifdef CONFIG_MIPS +static void loongson_set_min_mrrs_quirk(struct pci_dev *pdev) +{ + struct pci_bus *bus = pdev->bus; + struct pci_dev *bridge; + static const struct pci_device_id bridge_devids[] = { + { PCI_VDEVICE(LOONGSON, DEV_LS2K_PCIE_PORT0) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT0) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT1) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT2) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT3) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT4) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT5) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT6) }, + { 0, }, + }; + + /* look for the matching bridge */ + while (!pci_is_root_bus(bus)) { + bridge = bus->self; + bus = bus->parent; + + if (pci_match_id(bridge_devids, bridge)) { + if (pcie_get_readrq(pdev) > 256) { + pci_info(pdev, "limiting MRRS to 256\n"); + pcie_set_readrq(pdev, 256); + } + break; + } + } +} +DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_set_min_mrrs_quirk); +#endif + static void loongson_mrrs_quirk(struct pci_dev *pdev) { - /* - * Some Loongson PCIe ports have h/w limitations of maximum read - * request size. They can't handle anything larger than this. So - * force this limit on any devices attached under these ports. - */ struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); bridge->no_inc_mrrs = 1; diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 94ba61fe1c44..0452cbc362ee 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -751,7 +751,7 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata) if (!(features & VMD_FEAT_BIOS_PM_QUIRK)) return 0; - pci_enable_link_state(pdev, PCIE_LINK_STATE_ALL); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR); if (!pos) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 601129772b2d..5b1f271c6034 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -512,15 +512,12 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) if (pass && dev->subordinate) { check_hotplug_bridge(slot, dev); pcibios_resource_survey_bus(dev->subordinate); - if (pci_is_root_bus(bus)) - __pci_bus_size_bridges(dev->subordinate, &add_list); + __pci_bus_size_bridges(dev->subordinate, + &add_list); } } } - if (pci_is_root_bus(bus)) - __pci_bus_assign_resources(bus, &add_list, NULL); - else - pci_assign_unassigned_bridge_resources(bus->self); + __pci_bus_assign_resources(bus, &add_list, NULL); } acpiphp_sanitize_bus(bus); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 50b04ae5c394..5dab531c8654 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1041,7 +1041,7 @@ static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev) return bridge->link_state; } -static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) +static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1060,7 +1060,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) return -EPERM; } - if (sem) + if (!locked) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); if (state & PCIE_LINK_STATE_L0S) @@ -1082,7 +1082,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) link->clkpm_disable = 1; pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); - if (sem) + if (!locked) up_read(&pci_bus_sem); return 0; @@ -1090,7 +1090,9 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { - return __pci_disable_link_state(pdev, state, false); + lockdep_assert_held_read(&pci_bus_sem); + + return __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state_locked); @@ -1105,21 +1107,11 @@ EXPORT_SYMBOL(pci_disable_link_state_locked); */ int pci_disable_link_state(struct pci_dev *pdev, int state) { - return __pci_disable_link_state(pdev, state, true); + return __pci_disable_link_state(pdev, state, false); } EXPORT_SYMBOL(pci_disable_link_state); -/** - * pci_enable_link_state - Clear and set the default device link state so that - * the link may be allowed to enter the specified states. Note that if the - * BIOS didn't grant ASPM control to the OS, this does nothing because we can't - * touch the LNKCTL register. Also note that this does not enable states - * disabled by pci_disable_link_state(). Return 0 or a negative errno. - * - * @pdev: PCI device - * @state: Mask of ASPM link states to enable - */ -int pci_enable_link_state(struct pci_dev *pdev, int state) +static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1136,7 +1128,8 @@ int pci_enable_link_state(struct pci_dev *pdev, int state) return -EPERM; } - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); link->aspm_default = 0; if (state & PCIE_LINK_STATE_L0S) @@ -1157,12 +1150,48 @@ int pci_enable_link_state(struct pci_dev *pdev, int state) link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0; pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); return 0; } + +/** + * pci_enable_link_state - Clear and set the default device link state so that + * the link may be allowed to enter the specified states. Note that if the + * BIOS didn't grant ASPM control to the OS, this does nothing because we can't + * touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + */ +int pci_enable_link_state(struct pci_dev *pdev, int state) +{ + return __pci_enable_link_state(pdev, state, false); +} EXPORT_SYMBOL(pci_enable_link_state); +/** + * pci_enable_link_state_locked - Clear and set the default device link state + * so that the link may be allowed to enter the specified states. Note that if + * the BIOS didn't grant ASPM control to the OS, this does nothing because we + * can't touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + * + * Context: Caller holds pci_bus_sem read lock. + */ +int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ + lockdep_assert_held_read(&pci_bus_sem); + + return __pci_enable_link_state(pdev, state, true); +} +EXPORT_SYMBOL(pci_enable_link_state_locked); + static int pcie_aspm_set_policy(const char *val, const struct kernel_param *kp) { diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 014010d03588..847b0dc41293 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1816,7 +1816,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) idx = 0; while (cmn->dtc[j].counters[idx]) if (++idx == CMN_DT_NUM_COUNTERS) - goto free_dtms; + return -ENOSPC; } hw->dtc_idx[j] = idx; } diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c index f021ec5a70e5..553725e1269c 100644 --- a/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c +++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi-mt8183.c @@ -100,7 +100,7 @@ static void mtk_mipi_tx_pll_disable(struct clk_hw *hw) static long mtk_mipi_tx_pll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) { - return clamp_val(rate, 50000000, 1600000000); + return clamp_val(rate, 125000000, 1600000000); } static const struct clk_ops mtk_mipi_tx_pll_ops = { diff --git a/drivers/phy/sunplus/phy-sunplus-usb2.c b/drivers/phy/sunplus/phy-sunplus-usb2.c index 0efe74ac9c6a..637a5fbae6d9 100644 --- a/drivers/phy/sunplus/phy-sunplus-usb2.c +++ b/drivers/phy/sunplus/phy-sunplus-usb2.c @@ -275,7 +275,7 @@ static int sp_usb_phy_probe(struct platform_device *pdev) phy = devm_phy_create(&pdev->dev, NULL, &sp_uphy_ops); if (IS_ERR(phy)) { - ret = -PTR_ERR(phy); + ret = PTR_ERR(phy); return ret; } diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 555b323f45da..bc847d3879f7 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -64,6 +64,7 @@ struct phy_gmii_sel_priv { u32 num_ports; u32 reg_offset; u32 qsgmii_main_ports; + bool no_offset; }; static int phy_gmii_sel_mode(struct phy *phy, enum phy_mode mode, int submode) @@ -402,7 +403,8 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) priv->num_ports = size / sizeof(u32); if (!priv->num_ports) return -EINVAL; - priv->reg_offset = __be32_to_cpu(*offset); + if (!priv->no_offset) + priv->reg_offset = __be32_to_cpu(*offset); } if_phys = devm_kcalloc(dev, priv->num_ports, @@ -471,6 +473,7 @@ static int phy_gmii_sel_probe(struct platform_device *pdev) dev_err(dev, "Failed to get syscon %d\n", ret); return ret; } + priv->no_offset = true; } ret = phy_gmii_sel_init_ports(priv); diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index 6bbffb081053..b456370166b6 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -16,12 +16,17 @@ struct quirk_entry { u32 s2idle_bug_mmio; + bool spurious_8042; }; static struct quirk_entry quirk_s2idle_bug = { .s2idle_bug_mmio = 0xfed80380, }; +static struct quirk_entry quirk_spurious_8042 = { + .spurious_8042 = true, +}; + static const struct dmi_system_id fwbug_list[] = { { .ident = "L14 Gen2 AMD", @@ -193,6 +198,16 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15s-eq2xxx"), } }, + /* https://community.frame.work/t/tracking-framework-amd-ryzen-7040-series-lid-wakeup-behavior-feedback/39128 */ + { + .ident = "Framework Laptop 13 (Phoenix)", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Framework"), + DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"), + DMI_MATCH(DMI_BIOS_VERSION, "03.03"), + } + }, {} }; @@ -235,6 +250,9 @@ void amd_pmc_quirks_init(struct amd_pmc_dev *dev) { const struct dmi_system_id *dmi_id; + if (dev->cpu_id == AMD_CPU_ID_CZN) + dev->disable_8042_wakeup = true; + dmi_id = dmi_first_match(fwbug_list); if (!dmi_id) return; @@ -242,4 +260,6 @@ void amd_pmc_quirks_init(struct amd_pmc_dev *dev) if (dev->quirks->s2idle_bug_mmio) pr_info("Using s2idle quirk to avoid %s platform firmware bug\n", dmi_id->ident); + if (dev->quirks->spurious_8042) + dev->disable_8042_wakeup = true; } diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index c3104714b480..864c8cc2f8a3 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -91,16 +91,6 @@ #define SMU_MSG_LOG_RESET 0x07 #define SMU_MSG_LOG_DUMP_DATA 0x08 #define SMU_MSG_GET_SUP_CONSTRAINTS 0x09 -/* List of supported CPU ids */ -#define AMD_CPU_ID_RV 0x15D0 -#define AMD_CPU_ID_RN 0x1630 -#define AMD_CPU_ID_PCO AMD_CPU_ID_RV -#define AMD_CPU_ID_CZN AMD_CPU_ID_RN -#define AMD_CPU_ID_YC 0x14B5 -#define AMD_CPU_ID_CB 0x14D8 -#define AMD_CPU_ID_PS 0x14E8 -#define AMD_CPU_ID_SP 0x14A4 -#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 #define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 @@ -766,19 +756,22 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) return -EINVAL; } -static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev) +static int amd_pmc_wa_irq1(struct amd_pmc_dev *pdev) { struct device *d; int rc; - if (!pdev->major) { - rc = amd_pmc_get_smu_version(pdev); - if (rc) - return rc; - } + /* cezanne platform firmware has a fix in 64.66.0 */ + if (pdev->cpu_id == AMD_CPU_ID_CZN) { + if (!pdev->major) { + rc = amd_pmc_get_smu_version(pdev); + if (rc) + return rc; + } - if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65)) - return 0; + if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65)) + return 0; + } d = bus_find_device_by_name(&serio_bus, NULL, "serio0"); if (!d) @@ -937,8 +930,8 @@ static int amd_pmc_suspend_handler(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); - if (pdev->cpu_id == AMD_CPU_ID_CZN && !disable_workarounds) { - int rc = amd_pmc_czn_wa_irq1(pdev); + if (pdev->disable_8042_wakeup && !disable_workarounds) { + int rc = amd_pmc_wa_irq1(pdev); if (rc) { dev_err(pdev->dev, "failed to adjust keyboard wakeup: %d\n", rc); diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h index c27bd6a5642f..b4794f118739 100644 --- a/drivers/platform/x86/amd/pmc/pmc.h +++ b/drivers/platform/x86/amd/pmc/pmc.h @@ -36,9 +36,21 @@ struct amd_pmc_dev { struct mutex lock; /* generic mutex lock */ struct dentry *dbgfs_dir; struct quirk_entry *quirks; + bool disable_8042_wakeup; }; void amd_pmc_process_restore_quirks(struct amd_pmc_dev *dev); void amd_pmc_quirks_init(struct amd_pmc_dev *dev); +/* List of supported CPU ids */ +#define AMD_CPU_ID_RV 0x15D0 +#define AMD_CPU_ID_RN 0x1630 +#define AMD_CPU_ID_PCO AMD_CPU_ID_RV +#define AMD_CPU_ID_CZN AMD_CPU_ID_RN +#define AMD_CPU_ID_YC 0x14B5 +#define AMD_CPU_ID_CB 0x14D8 +#define AMD_CPU_ID_PS 0x14E8 +#define AMD_CPU_ID_SP 0x14A4 +#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 + #endif /* PMC_H */ diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 84c175b9721a..e95d3011b999 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -472,7 +472,7 @@ int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value) * is based on the contiguous indexes from ltr_show output. * pmc index and ltr index needs to be calculated from it. */ - for (pmc_index = 0; pmc_index < ARRAY_SIZE(pmcdev->pmcs) && ltr_index > 0; pmc_index++) { + for (pmc_index = 0; pmc_index < ARRAY_SIZE(pmcdev->pmcs) && ltr_index >= 0; pmc_index++) { pmc = pmcdev->pmcs[pmc_index]; if (!pmc) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 6fa1735ad7a4..210b0a81b7ec 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -73,10 +73,10 @@ struct intel_vbtn_priv { bool wakeup_mode; }; -static void detect_tablet_mode(struct platform_device *device) +static void detect_tablet_mode(struct device *dev) { - struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev); - acpi_handle handle = ACPI_HANDLE(&device->dev); + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + acpi_handle handle = ACPI_HANDLE(dev); unsigned long long vgbs; acpi_status status; int m; @@ -89,6 +89,8 @@ static void detect_tablet_mode(struct platform_device *device) input_report_switch(priv->switches_dev, SW_TABLET_MODE, m); m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0; input_report_switch(priv->switches_dev, SW_DOCK, m); + + input_sync(priv->switches_dev); } /* @@ -134,7 +136,7 @@ static int intel_vbtn_input_setup(struct platform_device *device) priv->switches_dev->id.bustype = BUS_HOST; if (priv->has_switches) { - detect_tablet_mode(device); + detect_tablet_mode(&device->dev); ret = input_register_device(priv->switches_dev); if (ret) @@ -198,6 +200,9 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); + + /* Some devices need this to report further events */ + acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* @@ -352,7 +357,13 @@ static void intel_vbtn_pm_complete(struct device *dev) static int intel_vbtn_pm_resume(struct device *dev) { + struct intel_vbtn_priv *priv = dev_get_drvdata(dev); + intel_vbtn_pm_complete(dev); + + if (priv->has_switches) + detect_tablet_mode(dev); + return 0; } diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 4dfdbfca6841..c66808601fdd 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -590,6 +590,8 @@ static void ips_disable_gpu_turbo(struct ips_driver *ips) * @ips: IPS driver struct * * Check whether the MCP is over its thermal or power budget. + * + * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool mcp_exceeded(struct ips_driver *ips) { @@ -619,6 +621,8 @@ static bool mcp_exceeded(struct ips_driver *ips) * @cpu: CPU number to check * * Check a given CPU's average temp or power is over its limit. + * + * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool cpu_exceeded(struct ips_driver *ips, int cpu) { @@ -645,6 +649,8 @@ static bool cpu_exceeded(struct ips_driver *ips, int cpu) * @ips: IPS driver struct * * Check the MCH temp & power against their maximums. + * + * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool mch_exceeded(struct ips_driver *ips) { @@ -742,12 +748,13 @@ static void update_turbo_limits(struct ips_driver *ips) * - down (at TDP limit) * - adjust both CPU and GPU down if possible * - cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- -cpu < gpu < cpu+gpu+ cpu+ gpu+ nothing -cpu < gpu >= cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- -cpu >= gpu < cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- -cpu >= gpu >= cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- + * |cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- + * cpu < gpu < |cpu+gpu+ cpu+ gpu+ nothing + * cpu < gpu >= |cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- + * cpu >= gpu < |cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- + * cpu >= gpu >=|cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- * + * Returns: %0 */ static int ips_adjust(void *data) { @@ -935,11 +942,13 @@ static void monitor_timeout(struct timer_list *t) * @data: ips driver structure * * This is the main function for the IPS driver. It monitors power and - * tempurature in the MCP and adjusts CPU and GPU power clams accordingly. + * temperature in the MCP and adjusts CPU and GPU power clamps accordingly. * - * We keep a 5s moving average of power consumption and tempurature. Using + * We keep a 5s moving average of power consumption and temperature. Using * that data, along with CPU vs GPU preference, we adjust the power clamps * up or down. + * + * Returns: %0 on success or -errno on error */ static int ips_monitor(void *data) { @@ -1146,6 +1155,8 @@ static void dump_thermal_info(struct ips_driver *ips) * Handle temperature limit trigger events, generally by lowering the clamps. * If we're at a critical limit, we clamp back to the lowest possible value * to prevent emergency shutdown. + * + * Returns: IRQ_NONE or IRQ_HANDLED */ static irqreturn_t ips_irq_handler(int irq, void *arg) { @@ -1293,9 +1304,12 @@ static void ips_debugfs_init(struct ips_driver *ips) /** * ips_detect_cpu - detect whether CPU supports IPS + * @ips: IPS driver struct * * Walk our list and see if we're on a supported CPU. If we find one, * return the limits for it. + * + * Returns: the &ips_mcp_limits struct that matches the boot CPU or %NULL */ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) { @@ -1352,6 +1366,8 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) * monitor and control graphics turbo mode. If we can find them, we can * enable graphics turbo, otherwise we must disable it to avoid exceeding * thermal and power limits in the MCP. + * + * Returns: %true if the required symbols are found, else %false */ static bool ips_get_i915_syms(struct ips_driver *ips) { diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d0b5fd4137bc..c4895e9bc714 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -512,10 +512,10 @@ struct tpacpi_quirk { * Iterates over a quirks list until one is found that matches the * ThinkPad's vendor, BIOS and EC model. * - * Returns 0 if nothing matches, otherwise returns the quirks field of + * Returns: %0 if nothing matches, otherwise returns the quirks field of * the matching &struct tpacpi_quirk entry. * - * The match criteria is: vendor, ec and bios much match. + * The match criteria is: vendor, ec and bios must match. */ static unsigned long __init tpacpi_check_quirks( const struct tpacpi_quirk *qlist, @@ -7948,8 +7948,19 @@ static struct ibm_struct volume_driver_data = { * TPACPI_FAN_WR_TPEC is also available and should be used to * command the fan. The X31/X40/X41 seems to have 8 fan levels, * but the ACPI tables just mention level 7. + * + * TPACPI_FAN_RD_TPEC_NS: + * This mode is used for a few ThinkPads (L13 Yoga Gen2, X13 Yoga Gen2 etc.) + * that are using non-standard EC locations for reporting fan speeds. + * Currently these platforms only provide fan rpm reporting. + * */ +#define FAN_RPM_CAL_CONST 491520 /* FAN RPM calculation offset for some non-standard ECFW */ + +#define FAN_NS_CTRL_STATUS BIT(2) /* Bit which determines control is enabled or not */ +#define FAN_NS_CTRL BIT(4) /* Bit which determines control is by host or EC */ + enum { /* Fan control constants */ fan_status_offset = 0x2f, /* EC register 0x2f */ fan_rpm_offset = 0x84, /* EC register 0x84: LSB, 0x85 MSB (RPM) @@ -7957,6 +7968,11 @@ enum { /* Fan control constants */ fan_select_offset = 0x31, /* EC register 0x31 (Firmware 7M) bit 0 selects which fan is active */ + fan_status_offset_ns = 0x93, /* Special status/control offset for non-standard EC Fan1 */ + fan2_status_offset_ns = 0x96, /* Special status/control offset for non-standard EC Fan2 */ + fan_rpm_status_ns = 0x95, /* Special offset for Fan1 RPM status for non-standard EC */ + fan2_rpm_status_ns = 0x98, /* Special offset for Fan2 RPM status for non-standard EC */ + TP_EC_FAN_FULLSPEED = 0x40, /* EC fan mode: full speed */ TP_EC_FAN_AUTO = 0x80, /* EC fan mode: auto fan control */ @@ -7967,6 +7983,7 @@ enum fan_status_access_mode { TPACPI_FAN_NONE = 0, /* No fan status or control */ TPACPI_FAN_RD_ACPI_GFAN, /* Use ACPI GFAN */ TPACPI_FAN_RD_TPEC, /* Use ACPI EC regs 0x2f, 0x84-0x85 */ + TPACPI_FAN_RD_TPEC_NS, /* Use non-standard ACPI EC regs (eg: L13 Yoga gen2 etc.) */ }; enum fan_control_access_mode { @@ -7994,6 +8011,8 @@ static u8 fan_control_desired_level; static u8 fan_control_resume_level; static int fan_watchdog_maxinterval; +static bool fan_with_ns_addr; + static struct mutex fan_mutex; static void fan_watchdog_fire(struct work_struct *ignored); @@ -8123,6 +8142,15 @@ static int fan_get_status(u8 *status) } break; + case TPACPI_FAN_RD_TPEC_NS: + /* Default mode is AUTO which means controlled by EC */ + if (!acpi_ec_read(fan_status_offset_ns, &s)) + return -EIO; + + if (status) + *status = s; + + break; default: return -ENXIO; @@ -8139,7 +8167,8 @@ static int fan_get_status_safe(u8 *status) if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; rc = fan_get_status(&s); - if (!rc) + /* NS EC doesn't have register with level settings */ + if (!rc && !fan_with_ns_addr) fan_update_desired_level(s); mutex_unlock(&fan_mutex); @@ -8166,7 +8195,13 @@ static int fan_get_speed(unsigned int *speed) if (likely(speed)) *speed = (hi << 8) | lo; + break; + case TPACPI_FAN_RD_TPEC_NS: + if (!acpi_ec_read(fan_rpm_status_ns, &lo)) + return -EIO; + if (speed) + *speed = lo ? FAN_RPM_CAL_CONST / lo : 0; break; default: @@ -8178,7 +8213,7 @@ static int fan_get_speed(unsigned int *speed) static int fan2_get_speed(unsigned int *speed) { - u8 hi, lo; + u8 hi, lo, status; bool rc; switch (fan_status_access_mode) { @@ -8194,7 +8229,21 @@ static int fan2_get_speed(unsigned int *speed) if (likely(speed)) *speed = (hi << 8) | lo; + break; + case TPACPI_FAN_RD_TPEC_NS: + rc = !acpi_ec_read(fan2_status_offset_ns, &status); + if (rc) + return -EIO; + if (!(status & FAN_NS_CTRL_STATUS)) { + pr_info("secondary fan control not supported\n"); + return -EIO; + } + rc = !acpi_ec_read(fan2_rpm_status_ns, &lo); + if (rc) + return -EIO; + if (speed) + *speed = lo ? FAN_RPM_CAL_CONST / lo : 0; break; default: @@ -8697,6 +8746,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */ #define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ #define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ +#define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8715,6 +8765,8 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (2nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ TPACPI_Q_LNV3('N', '3', '7', TPACPI_FAN_2CTL), /* T15g (2nd gen) */ + TPACPI_Q_LNV3('R', '1', 'F', TPACPI_FAN_NS), /* L13 Yoga Gen 2 */ + TPACPI_Q_LNV3('N', '2', 'U', TPACPI_FAN_NS), /* X13 Yoga Gen 2*/ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ }; @@ -8749,18 +8801,27 @@ static int __init fan_init(struct ibm_init_struct *iibm) return -ENODEV; } + if (quirks & TPACPI_FAN_NS) { + pr_info("ECFW with non-standard fan reg control found\n"); + fan_with_ns_addr = 1; + /* Fan ctrl support from host is undefined for now */ + tp_features.fan_ctrl_status_undef = 1; + } + if (gfan_handle) { /* 570, 600e/x, 770e, 770x */ fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; } else { /* all other ThinkPads: note that even old-style * ThinkPad ECs supports the fan control register */ - if (likely(acpi_ec_read(fan_status_offset, - &fan_control_initial_status))) { + if (fan_with_ns_addr || + likely(acpi_ec_read(fan_status_offset, &fan_control_initial_status))) { int res; unsigned int speed; - fan_status_access_mode = TPACPI_FAN_RD_TPEC; + fan_status_access_mode = fan_with_ns_addr ? + TPACPI_FAN_RD_TPEC_NS : TPACPI_FAN_RD_TPEC; + if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); /* Try and probe the 2nd fan */ @@ -8769,7 +8830,8 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (res >= 0 && speed != FAN_NOT_PRESENT) { /* It responded - so let's assume it's there */ tp_features.second_fan = 1; - tp_features.second_fan_ctl = 1; + /* fan control not currently available for ns ECFW */ + tp_features.second_fan_ctl = !fan_with_ns_addr; pr_info("secondary fan control detected & enabled\n"); } else { /* Fan not auto-detected */ @@ -8944,6 +9006,7 @@ static int fan_read(struct seq_file *m) str_enabled_disabled(status), status); break; + case TPACPI_FAN_RD_TPEC_NS: case TPACPI_FAN_RD_TPEC: /* all except 570, 600e/x, 770e, 770x */ rc = fan_get_status_safe(&status); @@ -8958,13 +9021,22 @@ static int fan_read(struct seq_file *m) seq_printf(m, "speed:\t\t%d\n", speed); - if (status & TP_EC_FAN_FULLSPEED) - /* Disengaged mode takes precedence */ - seq_printf(m, "level:\t\tdisengaged\n"); - else if (status & TP_EC_FAN_AUTO) - seq_printf(m, "level:\t\tauto\n"); - else - seq_printf(m, "level:\t\t%d\n", status); + if (fan_status_access_mode == TPACPI_FAN_RD_TPEC_NS) { + /* + * No full speed bit in NS EC + * EC Auto mode is set by default. + * No other levels settings available + */ + seq_printf(m, "level:\t\t%s\n", status & FAN_NS_CTRL ? "unknown" : "auto"); + } else { + if (status & TP_EC_FAN_FULLSPEED) + /* Disengaged mode takes precedence */ + seq_printf(m, "level:\t\tdisengaged\n"); + else if (status & TP_EC_FAN_AUTO) + seq_printf(m, "level:\t\tauto\n"); + else + seq_printf(m, "level:\t\t%d\n", status); + } break; case TPACPI_FAN_NONE: @@ -9303,7 +9375,7 @@ static struct tpacpi_battery_driver_data battery_info; /* ACPI helpers/functions/probes */ -/** +/* * This evaluates a ACPI method call specific to the battery * ACPI extension. The specifics are that an error is marked * in the 32rd bit of the response, so we just check that here. diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index b022af3d20fe..bf525ef32209 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1716,20 +1716,6 @@ ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r) return __ptp_ocp_get_mem(bp, start, r->size); } -static void -ptp_ocp_set_irq_resource(struct resource *res, int irq) -{ - struct resource r = DEFINE_RES_IRQ(irq); - *res = r; -} - -static void -ptp_ocp_set_mem_resource(struct resource *res, resource_size_t start, int size) -{ - struct resource r = DEFINE_RES_MEM(start, size); - *res = r; -} - static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r) { @@ -1741,15 +1727,15 @@ ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r) int id; start = pci_resource_start(pdev, 0) + r->offset; - ptp_ocp_set_mem_resource(&res[0], start, r->size); - ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + res[0] = DEFINE_RES_MEM(start, r->size); + res[1] = DEFINE_RES_IRQ(pci_irq_vector(pdev, r->irq_vec)); info = r->extra; id = pci_dev_id(pdev) << 1; id += info->pci_offset; p = platform_device_register_resndata(&pdev->dev, info->name, id, - res, 2, info->data, + res, ARRAY_SIZE(res), info->data, info->data_size); if (IS_ERR(p)) return PTR_ERR(p); @@ -1768,11 +1754,11 @@ ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id) info = r->extra; start = pci_resource_start(pdev, 0) + r->offset; - ptp_ocp_set_mem_resource(&res[0], start, r->size); - ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + res[0] = DEFINE_RES_MEM(start, r->size); + res[1] = DEFINE_RES_IRQ(pci_irq_vector(pdev, r->irq_vec)); return platform_device_register_resndata(&pdev->dev, info->name, - id, res, 2, + id, res, ARRAY_SIZE(res), info->data, info->data_size); } diff --git a/drivers/reset/core.c b/drivers/reset/core.c index 7ece6a8e9858..4d5a78d3c085 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -807,6 +807,9 @@ static void __reset_control_put_internal(struct reset_control *rstc) { lockdep_assert_held(&reset_list_mutex); + if (IS_ERR_OR_NULL(rstc)) + return; + kref_put(&rstc->refcnt, __reset_control_release); } @@ -1017,11 +1020,8 @@ EXPORT_SYMBOL_GPL(reset_control_put); void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs) { mutex_lock(&reset_list_mutex); - while (num_rstcs--) { - if (IS_ERR_OR_NULL(rstcs[num_rstcs].rstc)) - continue; + while (num_rstcs--) __reset_control_put_internal(rstcs[num_rstcs].rstc); - } mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_control_bulk_put); diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c index 8d1fce18ded7..5c3267acd2b1 100644 --- a/drivers/reset/hisilicon/hi6220_reset.c +++ b/drivers/reset/hisilicon/hi6220_reset.c @@ -163,7 +163,7 @@ static int hi6220_reset_probe(struct platform_device *pdev) if (!data) return -ENOMEM; - type = (enum hi6220_reset_ctrl_type)of_device_get_match_data(dev); + type = (uintptr_t)of_device_get_match_data(dev); regmap = syscon_node_to_regmap(np); if (IS_ERR(regmap)) { diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 3a9cc8a4a230..ade95e91b3c8 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -17,6 +17,7 @@ #include <linux/blk-mq.h> #include <linux/slab.h> #include <linux/list.h> +#include <linux/io.h> #include <asm/eadm.h> #include "scm_blk.h" @@ -130,7 +131,7 @@ static void scm_request_done(struct scm_request *scmrq) for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { msb = &scmrq->aob->msb[i]; - aidaw = msb->data_addr; + aidaw = (u64)phys_to_virt(msb->data_addr); if ((msb->flags & MSB_FLAG_IDA) && aidaw && IS_ALIGNED(aidaw, PAGE_SIZE)) @@ -195,12 +196,12 @@ static int scm_request_prepare(struct scm_request *scmrq) msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(req) << 9); msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE; msb->flags |= MSB_FLAG_IDA; - msb->data_addr = (u64) aidaw; + msb->data_addr = (u64)virt_to_phys(aidaw); rq_for_each_segment(bv, req, iter) { WARN_ON(bv.bv_offset); msb->blk_count += bv.bv_len >> 12; - aidaw->data_addr = (u64) page_address(bv.bv_page); + aidaw->data_addr = virt_to_phys(page_address(bv.bv_page)); aidaw++; } diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h index 70c5bbda0fea..047fa6101555 100644 --- a/drivers/s390/net/ism.h +++ b/drivers/s390/net/ism.h @@ -16,7 +16,6 @@ */ #define ISM_DMB_WORD_OFFSET 1 #define ISM_DMB_BIT_OFFSET (ISM_DMB_WORD_OFFSET * 32) -#define ISM_IDENT_MASK 0x00FFFF #define ISM_REG_SBA 0x1 #define ISM_REG_IEQ 0x2 @@ -192,12 +191,6 @@ struct ism_sba { #define ISM_CREATE_REQ(dmb, idx, sf, offset) \ ((dmb) | (idx) << 24 | (sf) << 23 | (offset)) -struct ism_systemeid { - u8 seid_string[24]; - u8 serial_number[4]; - u8 type[4]; -}; - static inline void __ism_read_cmd(struct ism_dev *ism, void *data, unsigned long offset, unsigned long len) { diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 81aabbfbbe2c..2c8e964425dc 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -36,6 +36,7 @@ static struct ism_client *clients[MAX_CLIENTS]; /* use an array rather than */ /* a list for fast mapping */ static u8 max_client; static DEFINE_MUTEX(clients_lock); +static bool ism_v2_capable; struct ism_dev_list { struct list_head list; struct mutex mutex; /* protects ism device list */ @@ -443,32 +444,6 @@ int ism_move(struct ism_dev *ism, u64 dmb_tok, unsigned int idx, bool sf, } EXPORT_SYMBOL_GPL(ism_move); -static struct ism_systemeid SYSTEM_EID = { - .seid_string = "IBM-SYSZ-ISMSEID00000000", - .serial_number = "0000", - .type = "0000", -}; - -static void ism_create_system_eid(void) -{ - struct cpuid id; - u16 ident_tail; - char tmp[5]; - - get_cpu_id(&id); - ident_tail = (u16)(id.ident & ISM_IDENT_MASK); - snprintf(tmp, 5, "%04X", ident_tail); - memcpy(&SYSTEM_EID.serial_number, tmp, 4); - snprintf(tmp, 5, "%04X", id.machine); - memcpy(&SYSTEM_EID.type, tmp, 4); -} - -u8 *ism_get_seid(void) -{ - return SYSTEM_EID.seid_string; -} -EXPORT_SYMBOL_GPL(ism_get_seid); - static void ism_handle_event(struct ism_dev *ism) { struct ism_event *entry; @@ -560,7 +535,9 @@ static int ism_dev_init(struct ism_dev *ism) if (!ism_add_vlan_id(ism, ISM_RESERVED_VLANID)) /* hardware is V2 capable */ - ism_create_system_eid(); + ism_v2_capable = true; + else + ism_v2_capable = false; mutex_lock(&ism_dev_list.mutex); mutex_lock(&clients_lock); @@ -665,8 +642,7 @@ static void ism_dev_exit(struct ism_dev *ism) } mutex_unlock(&clients_lock); - if (SYSTEM_EID.serial_number[0] != '0' || - SYSTEM_EID.type[0] != '0') + if (ism_v2_capable) ism_del_vlan_id(ism, ISM_RESERVED_VLANID); unregister_ieq(ism); unregister_sba(ism); @@ -743,10 +719,10 @@ static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, return ism_cmd(ism, &cmd); } -static int smcd_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid, - u32 vid) +static int smcd_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid, + u32 vid_valid, u32 vid) { - return ism_query_rgid(smcd->priv, rgid, vid_valid, vid); + return ism_query_rgid(smcd->priv, rgid->gid, vid_valid, vid); } static int smcd_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb, @@ -797,10 +773,11 @@ static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, return ism_cmd(ism, &cmd); } -static int smcd_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq, - u32 event_code, u64 info) +static int smcd_signal_ieq(struct smcd_dev *smcd, struct smcd_gid *rgid, + u32 trigger_irq, u32 event_code, u64 info) { - return ism_signal_ieq(smcd->priv, rgid, trigger_irq, event_code, info); + return ism_signal_ieq(smcd->priv, rgid->gid, + trigger_irq, event_code, info); } static int smcd_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx, @@ -812,8 +789,7 @@ static int smcd_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx, static int smcd_supports_v2(void) { - return SYSTEM_EID.serial_number[0] != '0' || - SYSTEM_EID.type[0] != '0'; + return ism_v2_capable; } static u64 ism_get_local_gid(struct ism_dev *ism) @@ -821,9 +797,11 @@ static u64 ism_get_local_gid(struct ism_dev *ism) return ism->local_gid; } -static u64 smcd_get_local_gid(struct smcd_dev *smcd) +static void smcd_get_local_gid(struct smcd_dev *smcd, + struct smcd_gid *smcd_gid) { - return ism_get_local_gid(smcd->priv); + smcd_gid->gid = ism_get_local_gid(smcd->priv); + smcd_gid->gid_ext = 0; } static u16 ism_get_chid(struct ism_dev *ism) @@ -857,7 +835,6 @@ static const struct smcd_ops ism_ops = { .signal_event = smcd_signal_ieq, .move_data = smcd_move, .supports_v2 = smcd_supports_v2, - .get_system_eid = ism_get_seid, .get_local_gid = smcd_get_local_gid, .get_chid = smcd_get_chid, .get_dev = smcd_get_dev, diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 73b6ac0c01f5..7d5a155073c6 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1678,7 +1678,6 @@ struct aac_dev u32 handle_pci_error; bool init_reset; u8 soft_reset_support; - u8 use_map_queue; }; #define aac_adapter_interrupt(dev) \ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 013a9a334972..25cee03d7f97 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -223,12 +223,8 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) { struct fib *fibptr; - u32 blk_tag; - int i; - blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); - i = blk_mq_unique_tag_to_tag(blk_tag); - fibptr = &dev->fibs[i]; + fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag]; /* * Null out fields that depend on being zero at the start of * each I/O diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index c4a36c0be527..68f4dbcfff49 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -19,7 +19,6 @@ #include <linux/compat.h> #include <linux/blkdev.h> -#include <linux/blk-mq-pci.h> #include <linux/completion.h> #include <linux/init.h> #include <linux/interrupt.h> @@ -505,15 +504,6 @@ common_config: return 0; } -static void aac_map_queues(struct Scsi_Host *shost) -{ - struct aac_dev *aac = (struct aac_dev *)shost->hostdata; - - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], - aac->pdev, 0); - aac->use_map_queue = true; -} - /** * aac_change_queue_depth - alter queue depths * @sdev: SCSI device we are considering @@ -1498,7 +1488,6 @@ static const struct scsi_host_template aac_driver_template = { .bios_param = aac_biosparm, .shost_groups = aac_host_groups, .slave_configure = aac_slave_configure, - .map_queues = aac_map_queues, .change_queue_depth = aac_change_queue_depth, .sdev_groups = aac_dev_groups, .eh_abort_handler = aac_eh_abort, @@ -1786,8 +1775,6 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = AAC_MAX_LUN; pci_set_drvdata(pdev, shost); - shost->nr_hw_queues = aac->max_msix; - shost->host_tagset = 1; error = scsi_add_host(shost, &pdev->dev); if (error) @@ -1919,7 +1906,6 @@ static void aac_remove_one(struct pci_dev *pdev) struct aac_dev *aac = (struct aac_dev *)shost->hostdata; aac_cancel_rescan_worker(aac); - aac->use_map_queue = false; scsi_remove_host(shost); __aac_shutdown(aac); diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 61949f374188..11ef58204e96 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -493,10 +493,6 @@ static int aac_src_deliver_message(struct fib *fib) #endif u16 vector_no; - struct scsi_cmnd *scmd; - u32 blk_tag; - struct Scsi_Host *shost = dev->scsi_host_ptr; - struct blk_mq_queue_map *qmap; atomic_inc(&q->numpending); @@ -509,25 +505,8 @@ static int aac_src_deliver_message(struct fib *fib) if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) && dev->sa_firmware) vector_no = aac_get_vector(dev); - else { - if (!fib->vector_no || !fib->callback_data) { - if (shost && dev->use_map_queue) { - qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; - vector_no = qmap->mq_map[raw_smp_processor_id()]; - } - /* - * We hardcode the vector_no for - * reserved commands as a valid shost is - * absent during the init - */ - else - vector_no = 0; - } else { - scmd = (struct scsi_cmnd *)fib->callback_data; - blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); - vector_no = blk_mq_unique_tag_to_hwq(blk_tag); - } - } + else + vector_no = fib->vector_no; if (native_hba) { if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) { diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c index 82672fcbc2aa..8280baa3254b 100644 --- a/drivers/soundwire/intel_ace2x.c +++ b/drivers/soundwire/intel_ace2x.c @@ -23,8 +23,9 @@ static void intel_shim_vs_init(struct sdw_intel *sdw) { void __iomem *shim_vs = sdw->link_res->shim_vs; - u16 act = 0; + u16 act; + act = intel_readw(shim_vs, SDW_SHIM2_INTEL_VS_ACTMCTL); u16p_replace_bits(&act, 0x1, SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS); act |= SDW_SHIM2_INTEL_VS_ACTMCTL_DACTQE; act |= SDW_SHIM2_INTEL_VS_ACTMCTL_DODS; diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 69719b335bcb..f048b3d55b2e 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -742,14 +742,15 @@ error_1: * sdw_ml_sync_bank_switch: Multilink register bank switch * * @bus: SDW bus instance + * @multi_link: whether this is a multi-link stream with hardware-based sync * * Caller function should free the buffers on error */ -static int sdw_ml_sync_bank_switch(struct sdw_bus *bus) +static int sdw_ml_sync_bank_switch(struct sdw_bus *bus, bool multi_link) { unsigned long time_left; - if (!bus->multi_link) + if (!multi_link) return 0; /* Wait for completion of transfer */ @@ -847,7 +848,7 @@ static int do_bank_switch(struct sdw_stream_runtime *stream) bus->bank_switch_timeout = DEFAULT_BANK_SWITCH_TIMEOUT; /* Check if bank switch was successful */ - ret = sdw_ml_sync_bank_switch(bus); + ret = sdw_ml_sync_bank_switch(bus, multi_link); if (ret < 0) { dev_err(bus->dev, "multi link bank switch failed: %d\n", ret); diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 6aa8adbe4170..bad34998454a 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -22,6 +22,7 @@ #include <linux/gpio/consumer.h> #include <linux/pinctrl/consumer.h> #include <linux/pm_runtime.h> +#include <linux/iopoll.h> #include <trace/events/spi.h> /* SPI register offsets */ @@ -233,9 +234,6 @@ */ #define DMA_MIN_BYTES 16 -#define SPI_DMA_MIN_TIMEOUT (msecs_to_jiffies(1000)) -#define SPI_DMA_TIMEOUT_PER_10K (msecs_to_jiffies(4)) - #define AUTOSUSPEND_TIMEOUT 2000 struct atmel_spi_caps { @@ -279,6 +277,7 @@ struct atmel_spi { bool keep_cs; u32 fifo_size; + bool last_polarity; u8 native_cs_free; u8 native_cs_for_gpio; }; @@ -292,6 +291,22 @@ struct atmel_spi_device { #define INVALID_DMA_ADDRESS 0xffffffff /* + * This frequency can be anything supported by the controller, but to avoid + * unnecessary delay, the highest possible frequency is chosen. + * + * This frequency is the highest possible which is not interfering with other + * chip select registers (see Note for Serial Clock Bit Rate configuration in + * Atmel-11121F-ATARM-SAMA5D3-Series-Datasheet_02-Feb-16, page 1283) + */ +#define DUMMY_MSG_FREQUENCY 0x02 +/* + * 8 bits is the minimum data the controller is capable of sending. + * + * This message can be anything as it should not be treated by any SPI device. + */ +#define DUMMY_MSG 0xAA + +/* * Version 2 of the SPI controller has * - CR.LASTXFER * - SPI_MR.DIV32 may become FDIV or must-be-zero (here: always zero) @@ -305,6 +320,43 @@ static bool atmel_spi_is_v2(struct atmel_spi *as) } /* + * Send a dummy message. + * + * This is sometimes needed when using a CS GPIO to force clock transition when + * switching between devices with different polarities. + */ +static void atmel_spi_send_dummy(struct atmel_spi *as, struct spi_device *spi, int chip_select) +{ + u32 status; + u32 csr; + + /* + * Set a clock frequency to allow sending message on SPI bus. + * The frequency here can be anything, but is needed for + * the controller to send the data. + */ + csr = spi_readl(as, CSR0 + 4 * chip_select); + csr = SPI_BFINS(SCBR, DUMMY_MSG_FREQUENCY, csr); + spi_writel(as, CSR0 + 4 * chip_select, csr); + + /* + * Read all data coming from SPI bus, needed to be able to send + * the message. + */ + spi_readl(as, RDR); + while (spi_readl(as, SR) & SPI_BIT(RDRF)) { + spi_readl(as, RDR); + cpu_relax(); + } + + spi_writel(as, TDR, DUMMY_MSG); + + readl_poll_timeout_atomic(as->regs + SPI_SR, status, + (status & SPI_BIT(TXEMPTY)), 1, 1000); +} + + +/* * Earlier SPI controllers (e.g. on at91rm9200) have a design bug whereby * they assume that spi slave device state will not change on deselect, so * that automagic deselection is OK. ("NPCSx rises if no data is to be @@ -320,11 +372,17 @@ static bool atmel_spi_is_v2(struct atmel_spi *as) * Master on Chip Select 0.") No workaround exists for that ... so for * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH, * and (c) will trigger that first erratum in some cases. + * + * When changing the clock polarity, the SPI controller waits for the next + * transmission to enforce the default clock state. This may be an issue when + * using a GPIO as Chip Select: the clock level is applied only when the first + * packet is sent, once the CS has already been asserted. The workaround is to + * avoid this by sending a first (dummy) message before toggling the CS state. */ - static void cs_activate(struct atmel_spi *as, struct spi_device *spi) { struct atmel_spi_device *asd = spi->controller_state; + bool new_polarity; int chip_select; u32 mr; @@ -353,6 +411,25 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) } mr = spi_readl(as, MR); + + /* + * Ensures the clock polarity is valid before we actually + * assert the CS to avoid spurious clock edges to be + * processed by the spi devices. + */ + if (spi_get_csgpiod(spi, 0)) { + new_polarity = (asd->csr & SPI_BIT(CPOL)) != 0; + if (new_polarity != as->last_polarity) { + /* + * Need to disable the GPIO before sending the dummy + * message because it is already set by the spi core. + */ + gpiod_set_value_cansleep(spi_get_csgpiod(spi, 0), 0); + atmel_spi_send_dummy(as, spi, chip_select); + as->last_polarity = new_polarity; + gpiod_set_value_cansleep(spi_get_csgpiod(spi, 0), 1); + } + } } else { u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0; int i; @@ -1336,12 +1413,10 @@ static int atmel_spi_one_transfer(struct spi_controller *host, } dma_timeout = msecs_to_jiffies(spi_controller_xfer_timeout(host, xfer)); - ret_timeout = wait_for_completion_interruptible_timeout(&as->xfer_completion, - dma_timeout); - if (ret_timeout <= 0) { - dev_err(&spi->dev, "spi transfer %s\n", - !ret_timeout ? "timeout" : "canceled"); - as->done_status = ret_timeout < 0 ? ret_timeout : -EIO; + ret_timeout = wait_for_completion_timeout(&as->xfer_completion, dma_timeout); + if (!ret_timeout) { + dev_err(&spi->dev, "spi transfer timeout\n"); + as->done_status = -EIO; } if (as->done_status) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 1f2f8c717df6..a50eb4db79de 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -451,7 +451,6 @@ static int cdns_transfer_one(struct spi_controller *ctlr, udelay(10); cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0); - spi_transfer_delay_exec(transfer); cdns_spi_write(xspi, CDNS_SPI_IER, CDNS_SPI_IXR_DEFAULT); return transfer->len; diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 498e35c8db2c..272bc871a848 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -659,11 +659,18 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, ctrl |= (spi_imx->target_burst * 8 - 1) << MX51_ECSPI_CTRL_BL_OFFSET; else { - if (spi_imx->count >= 512) - ctrl |= 0xFFF << MX51_ECSPI_CTRL_BL_OFFSET; - else - ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) + if (spi_imx->usedma) { + ctrl |= (spi_imx->bits_per_word * + spi_imx_bytes_per_word(spi_imx->bits_per_word) - 1) << MX51_ECSPI_CTRL_BL_OFFSET; + } else { + if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST) + ctrl |= (MX51_ECSPI_CTRL_MAX_BURST - 1) + << MX51_ECSPI_CTRL_BL_OFFSET; + else + ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) + << MX51_ECSPI_CTRL_BL_OFFSET; + } } /* set clock speed */ diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index da2558e274b4..db9d9365ff55 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -8,6 +8,7 @@ * Vinayak Holikatti <h.vinayak@samsung.com> */ +#include <linux/clk.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pm_opp.h> @@ -213,6 +214,55 @@ static void ufshcd_init_lanes_per_dir(struct ufs_hba *hba) } } +/** + * ufshcd_parse_clock_min_max_freq - Parse MIN and MAX clocks freq + * @hba: per adapter instance + * + * This function parses MIN and MAX frequencies of all clocks required + * by the host drivers. + * + * Returns 0 for success and non-zero for failure + */ +static int ufshcd_parse_clock_min_max_freq(struct ufs_hba *hba) +{ + struct list_head *head = &hba->clk_list_head; + struct ufs_clk_info *clki; + struct dev_pm_opp *opp; + unsigned long freq; + u8 idx = 0; + + list_for_each_entry(clki, head, list) { + if (!clki->name) + continue; + + clki->clk = devm_clk_get(hba->dev, clki->name); + if (IS_ERR(clki->clk)) + continue; + + /* Find Max Freq */ + freq = ULONG_MAX; + opp = dev_pm_opp_find_freq_floor_indexed(hba->dev, &freq, idx); + if (IS_ERR(opp)) { + dev_err(hba->dev, "Failed to find OPP for MAX frequency\n"); + return PTR_ERR(opp); + } + clki->max_freq = dev_pm_opp_get_freq_indexed(opp, idx); + dev_pm_opp_put(opp); + + /* Find Min Freq */ + freq = 0; + opp = dev_pm_opp_find_freq_ceil_indexed(hba->dev, &freq, idx); + if (IS_ERR(opp)) { + dev_err(hba->dev, "Failed to find OPP for MIN frequency\n"); + return PTR_ERR(opp); + } + clki->min_freq = dev_pm_opp_get_freq_indexed(opp, idx++); + dev_pm_opp_put(opp); + } + + return 0; +} + static int ufshcd_parse_operating_points(struct ufs_hba *hba) { struct device *dev = hba->dev; @@ -279,6 +329,10 @@ static int ufshcd_parse_operating_points(struct ufs_hba *hba) return ret; } + ret = ufshcd_parse_clock_min_max_freq(hba); + if (ret) + return ret; + hba->use_pm_opp = true; return 0; diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 1ba0eeb7552a..0e6157982607 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1374,8 +1374,17 @@ retry: goto alloc_done; /* Don't retry from all devices if we're out of open buckets: */ - if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) - goto allocate_blocking; + if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { + int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, + target, erasure_code, + nr_replicas, &nr_effective, + &have_cache, watermark, + flags, cl); + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + goto alloc_done; + } /* * Only try to allocate cache (durability = 0 devices) from the @@ -1389,7 +1398,6 @@ retry: &have_cache, watermark, flags, cl); } else { -allocate_blocking: ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8e0fe65f6101..6be79129738d 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3214,10 +3214,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) mempool_exit(&c->btree_trans_pool); } -int bch2_fs_btree_iter_init(struct bch_fs *c) +void bch2_fs_btree_iter_init_early(struct bch_fs *c) { struct btree_transaction_stats *s; - int ret; for (s = c->btree_transaction_stats; s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); @@ -3228,6 +3227,11 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) INIT_LIST_HEAD(&c->btree_trans_list); seqmutex_init(&c->btree_trans_lock); +} + +int bch2_fs_btree_iter_init(struct bch_fs *c) +{ + int ret; c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf); if (!c->btree_trans_bufs) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 85e7cb52f6b6..eaffced4c132 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -938,6 +938,7 @@ unsigned bch2_trans_get_fn_idx(const char *); void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); void bch2_fs_btree_iter_exit(struct bch_fs *); +void bch2_fs_btree_iter_init_early(struct bch_fs *); int bch2_fs_btree_iter_init(struct bch_fs *); #endif /* _BCACHEFS_BTREE_ITER_H */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 26be38ab6ecb..239fcc3c7c99 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -99,7 +99,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) /* Calculate ideal packed bkey format for new btree nodes: */ -void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) +static void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b) { struct bkey_packed *k; struct bset_tree *t; @@ -125,21 +125,20 @@ static struct bkey_format bch2_btree_calc_format(struct btree *b) return bch2_bkey_format_done(&s); } -static size_t btree_node_u64s_with_format(struct btree *b, +static size_t btree_node_u64s_with_format(struct btree_nr_keys nr, + struct bkey_format *old_f, struct bkey_format *new_f) { - struct bkey_format *old_f = &b->format; - /* stupid integer promotion rules */ ssize_t delta = (((int) new_f->key_u64s - old_f->key_u64s) * - (int) b->nr.packed_keys) + + (int) nr.packed_keys) + (((int) new_f->key_u64s - BKEY_U64s) * - (int) b->nr.unpacked_keys); + (int) nr.unpacked_keys); - BUG_ON(delta + b->nr.live_u64s < 0); + BUG_ON(delta + nr.live_u64s < 0); - return b->nr.live_u64s + delta; + return nr.live_u64s + delta; } /** @@ -147,16 +146,18 @@ static size_t btree_node_u64s_with_format(struct btree *b, * * @c: filesystem handle * @b: btree node to rewrite + * @nr: number of keys for new node (i.e. b->nr) * @new_f: bkey format to translate keys to * * Returns: true if all re-packed keys will be able to fit in a new node. * * Assumes all keys will successfully pack with the new format. */ -bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, +static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, + struct btree_nr_keys nr, struct bkey_format *new_f) { - size_t u64s = btree_node_u64s_with_format(b, new_f); + size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f); return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c); } @@ -391,7 +392,7 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as, * The keys might expand with the new format - if they wouldn't fit in * the btree node anymore, use the old format for now: */ - if (!bch2_btree_node_format_fits(as->c, b, &format)) + if (!bch2_btree_node_format_fits(as->c, b, b->nr, &format)) format = b->format; SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1); @@ -1345,8 +1346,11 @@ static void __btree_split_node(struct btree_update *as, struct bkey_packed *out[2]; struct bkey uk; unsigned u64s, n1_u64s = (b->nr.live_u64s * 3) / 5; + struct { unsigned nr_keys, val_u64s; } nr_keys[2]; int i; + memset(&nr_keys, 0, sizeof(nr_keys)); + for (i = 0; i < 2; i++) { BUG_ON(n[i]->nsets != 1); @@ -1368,6 +1372,9 @@ static void __btree_split_node(struct btree_update *as, if (!i) n1_pos = uk.p; bch2_bkey_format_add_key(&format[i], &uk); + + nr_keys[i].nr_keys++; + nr_keys[i].val_u64s += bkeyp_val_u64s(&b->format, k); } btree_set_min(n[0], b->data->min_key); @@ -1380,6 +1387,12 @@ static void __btree_split_node(struct btree_update *as, bch2_bkey_format_add_pos(&format[i], n[i]->data->max_key); n[i]->data->format = bch2_bkey_format_done(&format[i]); + + unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s + + nr_keys[i].val_u64s; + if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c)) + n[i]->data->format = b->format; + btree_node_set_format(n[i], n[i]->data->format); } @@ -1822,8 +1835,8 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_bkey_format_add_pos(&new_s, next->data->max_key); new_f = bch2_bkey_format_done(&new_s); - sib_u64s = btree_node_u64s_with_format(b, &new_f) + - btree_node_u64s_with_format(m, &new_f); + sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) + + btree_node_u64s_with_format(m->nr, &m->format, &new_f); if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) { sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c); diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 031076e75fa1..a6668992a272 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -6,10 +6,6 @@ #include "btree_locking.h" #include "btree_update.h" -void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *); -bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *, - struct bkey_format *); - #define BTREE_UPDATE_NODES_MAX ((BTREE_MAX_DEPTH - 2) * 2 + GC_MERGE_NODES) #define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1)) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 2418c528c533..b05457d284a6 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -560,7 +560,8 @@ int bch2_data_update_init(struct btree_trans *trans, move_ctxt_wait_event(ctxt, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, PTR_BUCKET_POS(c, &p.ptr), 0)) || - !atomic_read(&ctxt->read_sectors)); + (!atomic_read(&ctxt->read_sectors) && + !atomic_read(&ctxt->write_sectors))); if (!locked) bch2_bucket_nocow_lock(&c->nocow_locks, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 371565e02ff2..ba93e32d7708 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1143,24 +1143,33 @@ static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_inode_info *dir = to_bch_ei(vdir); - - if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32)) - return FILEID_INVALID; + int min_len; if (!S_ISDIR(inode->v.i_mode) && dir) { struct bcachefs_fid_with_parent *fid = (void *) fh; + min_len = sizeof(*fid) / sizeof(u32); + if (*len < min_len) { + *len = min_len; + return FILEID_INVALID; + } + fid->fid = bch2_inode_to_fid(inode); fid->dir = bch2_inode_to_fid(dir); - *len = sizeof(*fid) / sizeof(u32); + *len = min_len; return FILEID_BCACHEFS_WITH_PARENT; } else { struct bcachefs_fid *fid = (void *) fh; + min_len = sizeof(*fid) / sizeof(u32); + if (*len < min_len) { + *len = min_len; + return FILEID_INVALID; + } *fid = bch2_inode_to_fid(inode); - *len = sizeof(*fid) / sizeof(u32); + *len = min_len; return FILEID_BCACHEFS_WITHOUT_PARENT; } } diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 852d30567da9..d266aae90200 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -10,6 +10,9 @@ extern const char * const bch2_recovery_passes[]; static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { + if (c->recovery_passes_explicit & BIT_ULL(pass)) + return 0; + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f63474c5c5a2..818ec467a06b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -72,6 +72,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>"); MODULE_DESCRIPTION("bcachefs filesystem"); +MODULE_SOFTDEP("pre: crc32c"); +MODULE_SOFTDEP("pre: crc64"); +MODULE_SOFTDEP("pre: sha256"); +MODULE_SOFTDEP("pre: chacha20"); +MODULE_SOFTDEP("pre: poly1305"); +MODULE_SOFTDEP("pre: xxhash"); #define KTYPE(type) \ static const struct attribute_group type ## _group = { \ @@ -714,6 +720,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_copygc_init(c); bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); + bch2_fs_btree_iter_init_early(c); bch2_fs_btree_interior_update_init_early(c); bch2_fs_allocator_background_init(c); bch2_fs_allocator_foreground_init(c); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4e50b62db2a8..a1743904202b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1290,6 +1290,15 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, * are limited to own subvolumes only */ ret = -EPERM; + } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) { + /* + * Snapshots must be made with the src_inode referring + * to the subvolume inode, otherwise the permission + * checking above is useless because we may have + * permission on a lower directory but not the subvol + * itself. + */ + ret = -EINVAL; } else { ret = btrfs_mksnapshot(&file->f_path, idmap, name, namelen, diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index fdf2aad73470..e6beaaf4f170 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -26,8 +26,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int i; int flags = nfsexp_flags(rqstp, exp); - validate_process_creds(); - /* discard any old override before preparing the new set */ revert_creds(get_cred(current_real_cred())); new = prepare_creds(); @@ -81,10 +79,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - validate_process_creds(); put_cred(override_creds(new)); put_cred(new); - validate_process_creds(); return 0; oom: diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 92bc109dabe6..4039ffcf90ba 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -84,21 +84,7 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n) static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, size_t len) { - xdr_stream_encode_uint32_array(xdr, bitmap, len); -} - -static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, - struct nfs4_cb_fattr *fattr) -{ - fattr->ncf_cb_change = 0; - fattr->ncf_cb_fsize = 0; - if (bitmap[0] & FATTR4_WORD0_CHANGE) - if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0) - return -NFSERR_BAD_XDR; - if (bitmap[0] & FATTR4_WORD0_SIZE) - if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0) - return -NFSERR_BAD_XDR; - return 0; + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); } /* @@ -372,30 +358,6 @@ encode_cb_recallany4args(struct xdr_stream *xdr, } /* - * CB_GETATTR4args - * struct CB_GETATTR4args { - * nfs_fh4 fh; - * bitmap4 attr_request; - * }; - * - * The size and change attributes are the only one - * guaranteed to be serviced by the client. - */ -static void -encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, - struct nfs4_cb_fattr *fattr) -{ - struct nfs4_delegation *dp = - container_of(fattr, struct nfs4_delegation, dl_cb_fattr); - struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle; - - encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR); - encode_nfs_fh4(xdr, fh); - encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap)); - hdr->nops++; -} - -/* * CB_SEQUENCE4args * * struct CB_SEQUENCE4args { @@ -531,26 +493,6 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, } /* - * 20.1. Operation 3: CB_GETATTR - Get Attributes - */ -static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req, - struct xdr_stream *xdr, const void *data) -{ - const struct nfsd4_callback *cb = data; - struct nfs4_cb_fattr *ncf = - container_of(cb, struct nfs4_cb_fattr, ncf_getattr); - struct nfs4_cb_compound_hdr hdr = { - .ident = cb->cb_clp->cl_cb_ident, - .minorversion = cb->cb_clp->cl_minorversion, - }; - - encode_cb_compound4args(xdr, &hdr); - encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_getattr4args(xdr, &hdr, ncf); - encode_cb_nops(&hdr); -} - -/* * 20.2. Operation 4: CB_RECALL - Recall a Delegation */ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -606,42 +548,6 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, } /* - * 20.1. Operation 3: CB_GETATTR - Get Attributes - */ -static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, - struct xdr_stream *xdr, - void *data) -{ - struct nfsd4_callback *cb = data; - struct nfs4_cb_compound_hdr hdr; - int status; - u32 bitmap[3] = {0}; - u32 attrlen; - struct nfs4_cb_fattr *ncf = - container_of(cb, struct nfs4_cb_fattr, ncf_getattr); - - status = decode_cb_compound4res(xdr, &hdr); - if (unlikely(status)) - return status; - - status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_seq_status)) - return status; - - status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); - if (status) - return status; - if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) - return -NFSERR_BAD_XDR; - if (xdr_stream_decode_u32(xdr, &attrlen) < 0) - return -NFSERR_BAD_XDR; - if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize))) - return -NFSERR_BAD_XDR; - status = decode_cb_fattr4(xdr, bitmap, ncf); - return status; -} - -/* * 20.2. Operation 4: CB_RECALL - Recall a Delegation */ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, @@ -949,7 +855,6 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), - PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 40415929e2ae..3edbfa0233e6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -127,7 +127,6 @@ static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; -static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops; static struct workqueue_struct *laundry_wq; @@ -1190,10 +1189,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); - nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, - &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); - dp->dl_cb_fattr.ncf_file_modified = false; - dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; @@ -2901,56 +2896,11 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) spin_unlock(&nn->client_lock); } -static int -nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task) -{ - struct nfs4_cb_fattr *ncf = - container_of(cb, struct nfs4_cb_fattr, ncf_getattr); - - ncf->ncf_cb_status = task->tk_status; - switch (task->tk_status) { - case -NFS4ERR_DELAY: - rpc_delay(task, 2 * HZ); - return 0; - default: - return 1; - } -} - -static void -nfsd4_cb_getattr_release(struct nfsd4_callback *cb) -{ - struct nfs4_cb_fattr *ncf = - container_of(cb, struct nfs4_cb_fattr, ncf_getattr); - struct nfs4_delegation *dp = - container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - - nfs4_put_stid(&dp->dl_stid); - clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); - wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY); -} - static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { .done = nfsd4_cb_recall_any_done, .release = nfsd4_cb_recall_any_release, }; -static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = { - .done = nfsd4_cb_getattr_done, - .release = nfsd4_cb_getattr_release, -}; - -void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) -{ - struct nfs4_delegation *dp = - container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - - if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) - return; - refcount_inc(&dp->dl_stid.sc_count); - nfsd4_run_cb(&ncf->ncf_getattr); -} - static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -5685,8 +5635,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent = NULL; int cb_up; int status = 0; - struct kstat stat; - struct path path; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = false; @@ -5724,18 +5672,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); - path.mnt = currentfh->fh_export->ex_path.mnt; - path.dentry = currentfh->fh_dentry; - if (vfs_getattr(&path, &stat, - (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), - AT_STATX_SYNC_AS_STAT)) { - nfs4_put_stid(&dp->dl_stid); - destroy_delegation(dp); - goto out_no_deleg; - } - dp->dl_cb_fattr.ncf_cur_fsize = stat.size; - dp->dl_cb_fattr.ncf_initial_cinfo = - nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry)); } else { open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); @@ -8492,8 +8428,6 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context * @inode: file to be checked for a conflict - * @modified: return true if file was modified - * @size: new size of file if modified is true * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server @@ -8502,23 +8436,21 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * delegation before replying to the GETATTR. See RFC 8881 section * 18.7.4. * + * The current implementation does not support CB_GETATTR yet. However + * this can avoid recalling the delegation could be added in follow up + * work. + * * Returns 0 if there is no conflict; otherwise an nfs_stat * code is returned. */ __be32 -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, - bool *modified, u64 *size) +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) { + __be32 status; struct file_lock_context *ctx; - struct nfs4_delegation *dp; - struct nfs4_cb_fattr *ncf; struct file_lock *fl; - struct iattr attrs; - __be32 status; - - might_sleep(); + struct nfs4_delegation *dp; - *modified = false; ctx = locks_inode_context(inode); if (!ctx) return 0; @@ -8545,34 +8477,10 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, break_lease: spin_unlock(&ctx->flc_lock); nfsd_stats_wdeleg_getattr_inc(); - - dp = fl->fl_owner; - ncf = &dp->dl_cb_fattr; - nfs4_cb_getattr(&dp->dl_cb_fattr); - wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE); - if (ncf->ncf_cb_status) { - status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); - if (status != nfserr_jukebox || - !nfsd_wait_for_delegreturn(rqstp, inode)) - return status; - } - if (!ncf->ncf_file_modified && - (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || - ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) - ncf->ncf_file_modified = true; - if (ncf->ncf_file_modified) { - /* - * The server would not update the file's metadata - * with the client's modified size. - */ - attrs.ia_mtime = attrs.ia_ctime = current_time(inode); - attrs.ia_valid = ATTR_MTIME | ATTR_CTIME; - setattr_copy(&nop_mnt_idmap, inode, &attrs); - mark_inode_dirty(inode); - ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; - *size = ncf->ncf_cur_fsize; - *modified = true; - } + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) + return status; return 0; } break; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ec4ed6206df1..b499fe9caa32 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3505,9 +3505,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, u32 attrmask[3]; unsigned long mask[2]; } u; - bool file_modified; unsigned long bit; - u64 size = 0; WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1); WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval)); @@ -3534,8 +3532,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, } args.size = 0; if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry), - &file_modified, &size); + status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry)); if (status) goto out; } @@ -3545,7 +3542,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; - args.size = file_modified ? size : args.stat.size; + args.size = args.stat.size; if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3e15b72f421d..7cd513e59305 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -705,8 +705,10 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - if (err >= 0 && - !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + else if (err >= 0 && + !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) svc_get(nn->nfsd_serv); nfsd_put(net); @@ -757,6 +759,9 @@ out_close: svc_xprt_put(xprt); } out_err: + if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + nfsd_put(net); return err; } @@ -1510,11 +1515,10 @@ int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb) int ret = -ENODEV; mutex_lock(&nfsd_mutex); - if (nn->nfsd_serv) { - svc_get(nn->nfsd_serv); + if (nn->nfsd_serv) ret = 0; - } - mutex_unlock(&nfsd_mutex); + else + mutex_unlock(&nfsd_mutex); return ret; } @@ -1686,8 +1690,6 @@ out: */ int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb) { - mutex_lock(&nfsd_mutex); - nfsd_put(sock_net(cb->skb->sk)); mutex_unlock(&nfsd_mutex); return 0; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index f5ff42f41ee7..3286ffacbc56 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -155,6 +155,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); +void nfsd_last_thread(struct net *net); extern int nfsd_max_blksize; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fe61d9bbcc1f..7a2bc8e82a63 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -542,7 +542,7 @@ static struct notifier_block nfsd_inet6addr_notifier = { /* Only used under nfsd_mutex, so this atomic may be overkill: */ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); -static void nfsd_last_thread(struct net *net) +void nfsd_last_thread(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv = nn->nfsd_serv; @@ -955,7 +955,6 @@ nfsd(void *vrqstp) rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); - validate_process_creds(); } atomic_dec(&nfsdstats.th_cnt); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index f96eaa8e9413..41bdc913fa71 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -117,24 +117,6 @@ struct nfs4_cpntf_state { time64_t cpntf_time; /* last time stateid used */ }; -struct nfs4_cb_fattr { - struct nfsd4_callback ncf_getattr; - u32 ncf_cb_status; - u32 ncf_cb_bmap[1]; - - /* from CB_GETATTR reply */ - u64 ncf_cb_change; - u64 ncf_cb_fsize; - - unsigned long ncf_cb_flags; - bool ncf_file_modified; - u64 ncf_initial_cinfo; - u64 ncf_cur_fsize; -}; - -/* bits for ncf_cb_flags */ -#define CB_GETATTR_BUSY 0 - /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -168,9 +150,6 @@ struct nfs4_delegation { int dl_retries; struct nfsd4_callback dl_recall; bool dl_recalled; - - /* for CB_GETATTR */ - struct nfs4_cb_fattr dl_cb_fattr; }; #define cb_to_delegation(cb) \ @@ -661,7 +640,6 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, NFSPROC4_CLNT_CB_RECALL_ANY, - NFSPROC4_CLNT_CB_GETATTR, }; /* Returns true iff a is later than b: */ @@ -754,6 +732,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) } extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, - struct inode *inode, bool *file_modified, u64 *size); -extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf); + struct inode *inode); #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fbbea7498f02..e01e4e2acbd9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -901,7 +901,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int host_err; bool retried = false; - validate_process_creds(); /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE @@ -926,7 +925,6 @@ retry: } err = nfserrno(host_err); } - validate_process_creds(); return err; } @@ -943,12 +941,7 @@ int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, struct file **filp) { - int err; - - validate_process_creds(); - err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); - validate_process_creds(); - return err; + return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); } /* diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index e8b00309c449..0d39af1b00a0 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -54,21 +54,3 @@ #define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) - -/* - * 1: CB_GETATTR opcode (32-bit) - * N: file_handle - * 1: number of entry in attribute array (32-bit) - * 1: entry 0 in attribute array (32-bit) - */ -#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \ - cb_sequence_enc_sz + \ - 1 + enc_nfs4_fh_sz + 1 + 1) -/* - * 4: fattr_bitmap_maxsz - * 1: attribute array len - * 2: change attr (64-bit) - * 2: size (64-bit) - */ -#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz) diff --git a/fs/open.c b/fs/open.c index 02dc608d40d8..3494a9cd8046 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1088,8 +1088,6 @@ struct file *dentry_open(const struct path *path, int flags, int error; struct file *f; - validate_creds(cred); - /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); @@ -1128,7 +1126,6 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, struct file *f; int error; - validate_creds(cred); f = alloc_empty_file(flags, cred); if (IS_ERR(f)) return f; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 4382881b0709..8bea66c97316 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -753,15 +753,16 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) path.dentry = temp; err = ovl_copy_up_data(c, &path); /* - * We cannot hold lock_rename() throughout this helper, because or + * We cannot hold lock_rename() throughout this helper, because of * lock ordering with sb_writers, which shouldn't be held when calling * ovl_copy_up_data(), so lock workdir and destdir and make sure that * temp wasn't moved before copy up completion or cleanup. - * If temp was moved, abort without the cleanup. */ ovl_start_write(c->dentry); if (lock_rename(c->workdir, c->destdir) != NULL || temp->d_parent != c->workdir) { + /* temp or workdir moved underneath us? abort without cleanup */ + dput(temp); err = -EIO; goto unlock; } else if (err) { diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 59f6b8e32cc9..d64a306a414b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -291,16 +291,23 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId); #endif /* CIFS_DEBUG2 */ - rc = -EINVAL; + if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { spin_unlock(&cfids->cfid_list_lock); + rc = -EINVAL; + goto oshr_free; + } + + rc = smb2_parse_contexts(server, rsp_iov, + &oparms.fid->epoch, + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) { + spin_unlock(&cfids->cfid_list_lock); goto oshr_free; } - smb2_parse_contexts(server, o_rsp, - &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + rc = -EINVAL; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { spin_unlock(&cfids->cfid_list_lock); goto oshr_free; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 5596c9f30ccb..60027f5aebe8 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -40,11 +40,13 @@ void cifs_dump_detail(void *buf, struct TCP_Server_Info *server) #ifdef CONFIG_CIFS_DEBUG2 struct smb_hdr *smb = buf; - cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n", - smb->Command, smb->Status.CifsError, - smb->Flags, smb->Flags2, smb->Mid, smb->Pid); - cifs_dbg(VFS, "smb buf %p len %u\n", smb, - server->ops->calc_smb_size(smb)); + cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d Wct: %d\n", + smb->Command, smb->Status.CifsError, smb->Flags, + smb->Flags2, smb->Mid, smb->Pid, smb->WordCount); + if (!server->ops->check_message(buf, server->total_read, server)) { + cifs_dbg(VFS, "smb buf %p len %u\n", smb, + server->ops->calc_smb_size(smb)); + } #endif /* CONFIG_CIFS_DEBUG2 */ } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7558167f603c..55b3ce944022 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -532,7 +532,8 @@ struct smb_version_operations { struct mid_q_entry **, char **, int *); enum securityEnum (*select_sectype)(struct TCP_Server_Info *, enum securityEnum); - int (*next_header)(char *); + int (*next_header)(struct TCP_Server_Info *server, char *buf, + unsigned int *noff); /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 9dc6dc2754c2..dd2a1fb65e71 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1201,7 +1201,12 @@ next_pdu: server->total_read += length; if (server->ops->next_header) { - next_offset = server->ops->next_header(buf); + if (server->ops->next_header(server, buf, &next_offset)) { + cifs_dbg(VFS, "%s: malformed response (next_offset=%u)\n", + __func__, next_offset); + cifs_reconnect(server, true); + continue; + } if (next_offset) server->pdu_size = next_offset; } diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index cf17e3dd703e..32a8525415d9 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -4671,7 +4671,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, /* we do not want atime to be less than mtime, it broke some apps */ atime = inode_set_atime_to_ts(inode, current_time(inode)); mtime = inode_get_mtime(inode); - if (timespec64_compare(&atime, &mtime)) + if (timespec64_compare(&atime, &mtime) < 0) inode_set_atime_to_ts(inode, inode_get_mtime(inode)); if (PAGE_SIZE > rc) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 35b176457bbe..c2137ea3c253 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -363,6 +363,10 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) cifs_dbg(VFS, "Length less than smb header size\n"); } return -EIO; + } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) { + cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n", + __func__, smb->WordCount); + return -EIO; } /* otherwise, there is enough to get to the BCC */ diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 816e01c5589b..2d3b332a79a1 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -439,7 +439,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "unable to find a suitable iface\n"); } - if (!chan_index && !iface) { + if (!iface) { cifs_dbg(FYI, "unable to get the interface matching: %pIS\n", &ss); spin_unlock(&ses->iface_lock); @@ -447,7 +447,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) } /* now drop the ref to the current iface */ - if (old_iface && iface) { + if (old_iface) { cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); @@ -460,44 +460,32 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) kref_put(&old_iface->refcount, release_iface); } else if (old_iface) { - cifs_dbg(FYI, "releasing ref to iface: %pIS\n", + /* if a new candidate is not found, keep things as is */ + cifs_dbg(FYI, "could not replace iface: %pIS\n", &old_iface->sockaddr); - - old_iface->num_channels--; - if (old_iface->weight_fulfilled) - old_iface->weight_fulfilled--; - - kref_put(&old_iface->refcount, release_iface); } else if (!chan_index) { /* special case: update interface for primary channel */ - cifs_dbg(FYI, "referencing primary channel iface: %pIS\n", - &iface->sockaddr); - iface->num_channels++; - iface->weight_fulfilled++; - } else { - WARN_ON(!iface); - cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); + if (iface) { + cifs_dbg(FYI, "referencing primary channel iface: %pIS\n", + &iface->sockaddr); + iface->num_channels++; + iface->weight_fulfilled++; + } } spin_unlock(&ses->iface_lock); - spin_lock(&ses->chan_lock); - chan_index = cifs_ses_get_chan_index(ses, server); - if (chan_index == CIFS_INVAL_CHAN_INDEX) { + if (iface) { + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + return 0; + } + + ses->chans[chan_index].iface = iface; spin_unlock(&ses->chan_lock); - return 0; } - ses->chans[chan_index].iface = iface; - - /* No iface is found. if secondary chan, drop connection */ - if (!iface && SERVER_IS_CHAN(server)) - ses->chans[chan_index].server = NULL; - - spin_unlock(&ses->chan_lock); - - if (!iface && SERVER_IS_CHAN(server)) - cifs_put_tcp_session(server, false); - return rc; } diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 32dfa0f7a78c..82b84a4941dd 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -173,6 +173,21 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) } mid = le64_to_cpu(shdr->MessageId); + if (check_smb2_hdr(shdr, mid)) + return 1; + + if (shdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { + cifs_dbg(VFS, "Invalid structure size %u\n", + le16_to_cpu(shdr->StructureSize)); + return 1; + } + + command = le16_to_cpu(shdr->Command); + if (command >= NUMBER_OF_SMB2_COMMANDS) { + cifs_dbg(VFS, "Invalid SMB2 command %d\n", command); + return 1; + } + if (len < pdu_size) { if ((len >= hdr_size) && (shdr->Status != 0)) { @@ -193,21 +208,6 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) return 1; } - if (check_smb2_hdr(shdr, mid)) - return 1; - - if (shdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { - cifs_dbg(VFS, "Invalid structure size %u\n", - le16_to_cpu(shdr->StructureSize)); - return 1; - } - - command = le16_to_cpu(shdr->Command); - if (command >= NUMBER_OF_SMB2_COMMANDS) { - cifs_dbg(VFS, "Invalid SMB2 command %d\n", command); - return 1; - } - if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { if (command != SMB2_OPLOCK_BREAK_HE && (shdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) { @@ -313,6 +313,9 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { char * smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) { + const int max_off = 4096; + const int max_len = 128 * 1024; + *off = 0; *len = 0; @@ -384,29 +387,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) * Invalid length or offset probably means data area is invalid, but * we have little choice but to ignore the data area in this case. */ - if (*off > 4096) { - cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off); - *len = 0; + if (unlikely(*off < 0 || *off > max_off || + *len < 0 || *len > max_len)) { + cifs_dbg(VFS, "%s: invalid data area (off=%d len=%d)\n", + __func__, *off, *len); *off = 0; - } else if (*off < 0) { - cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n", - *off); - *off = 0; - *len = 0; - } else if (*len < 0) { - cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n", - *len); *len = 0; - } else if (*len > 128 * 1024) { - cifs_dbg(VFS, "data area larger than 128K: %d\n", *len); + } else if (*off == 0) { *len = 0; } /* return pointer to beginning of data area, ie offset from SMB start */ - if ((*off != 0) && (*len != 0)) + if (*off > 0 && *len > 0) return (char *)shdr + *off; - else - return NULL; + return NULL; } /* diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index fcfb6566b899..66b310208545 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -403,8 +403,10 @@ smb2_dump_detail(void *buf, struct TCP_Server_Info *server) cifs_server_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n", shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId, shdr->Id.SyncId.ProcessId); - cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, - server->ops->calc_smb_size(buf)); + if (!server->ops->check_message(buf, server->total_read, server)) { + cifs_server_dbg(VFS, "smb buf %p len %u\n", buf, + server->ops->calc_smb_size(buf)); + } #endif } @@ -3003,7 +3005,7 @@ static int smb2_query_reparse_point(const unsigned int xid, struct kvec *rsp_iov; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; - u32 plen; + u32 off, count, len; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -3084,16 +3086,22 @@ static int smb2_query_reparse_point(const unsigned int xid, */ if (rc == 0) { /* See MS-FSCC 2.3.23 */ + off = le32_to_cpu(ioctl_rsp->OutputOffset); + count = le32_to_cpu(ioctl_rsp->OutputCount); + if (check_add_overflow(off, count, &len) || + len > rsp_iov[1].iov_len) { + cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n", + __func__, off, count); + rc = -EIO; + goto query_rp_exit; + } - reparse_buf = (struct reparse_data_buffer *) - ((char *)ioctl_rsp + - le32_to_cpu(ioctl_rsp->OutputOffset)); - plen = le32_to_cpu(ioctl_rsp->OutputCount); - - if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > - rsp_iov[1].iov_len) { - cifs_tcon_dbg(FYI, "srv returned invalid ioctl len: %d\n", - plen); + reparse_buf = (void *)((u8 *)ioctl_rsp + off); + len = sizeof(*reparse_buf); + if (count < len || + count < le16_to_cpu(reparse_buf->ReparseDataLength) + len) { + cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n", + __func__, off, count); rc = -EIO; goto query_rp_exit; } @@ -4943,6 +4951,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server, struct smb2_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; + unsigned int next_cmd; struct mid_q_entry *mid_entry; int next_is_large; char *next_buffer = NULL; @@ -4971,14 +4980,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server, next_is_large = server->large_buf; one_more: shdr = (struct smb2_hdr *)buf; - if (shdr->NextCommand) { + next_cmd = le32_to_cpu(shdr->NextCommand); + if (next_cmd) { + if (WARN_ON_ONCE(next_cmd > pdu_length)) + return -1; if (next_is_large) next_buffer = (char *)cifs_buf_get(); else next_buffer = (char *)cifs_small_buf_get(); - memcpy(next_buffer, - buf + le32_to_cpu(shdr->NextCommand), - pdu_length - le32_to_cpu(shdr->NextCommand)); + memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd); } mid_entry = smb2_find_mid(server, buf); @@ -5002,8 +5012,8 @@ one_more: else ret = cifs_handle_standard(server, mid_entry); - if (ret == 0 && shdr->NextCommand) { - pdu_length -= le32_to_cpu(shdr->NextCommand); + if (ret == 0 && next_cmd) { + pdu_length -= next_cmd; server->large_buf = next_is_large; if (next_is_large) server->bigbuf = buf = next_buffer; @@ -5066,17 +5076,22 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid) NULL, 0, false); } -static int -smb2_next_header(char *buf) +static int smb2_next_header(struct TCP_Server_Info *server, char *buf, + unsigned int *noff) { struct smb2_hdr *hdr = (struct smb2_hdr *)buf; struct smb2_transform_hdr *t_hdr = (struct smb2_transform_hdr *)buf; - if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) - return sizeof(struct smb2_transform_hdr) + - le32_to_cpu(t_hdr->OriginalMessageSize); - - return le32_to_cpu(hdr->NextCommand); + if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) { + *noff = le32_to_cpu(t_hdr->OriginalMessageSize); + if (unlikely(check_add_overflow(*noff, sizeof(*t_hdr), noff))) + return -EINVAL; + } else { + *noff = le32_to_cpu(hdr->NextCommand); + } + if (unlikely(*noff && *noff < MID_HEADER_SIZE(server))) + return -EINVAL; + return 0; } int cifs_sfu_make_node(unsigned int xid, struct inode *inode, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 20634fc6d4f0..4f971c1061f0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -411,8 +411,7 @@ skip_sess_setup: } if (smb2_command != SMB2_INTERNAL_CMD) - if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) - cifs_put_tcp_session(server, false); + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); atomic_inc(&tconInfoReconnectCount); out: @@ -471,10 +470,15 @@ static int __smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, void **request_buf, unsigned int *total_len) { /* BB eventually switch this to SMB2 specific small buf size */ - if (smb2_command == SMB2_SET_INFO) + switch (smb2_command) { + case SMB2_SET_INFO: + case SMB2_QUERY_INFO: *request_buf = cifs_buf_get(); - else + break; + default: *request_buf = cifs_small_buf_get(); + break; + } if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -2236,17 +2240,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2255,45 +2260,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; + + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen >= doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } + break; + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ - - next = le32_to_cpu(cc->Next); - if (!next) + + off = le32_to_cpu(cc->Next); + if (!off) break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3124,8 +3147,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); @@ -3568,8 +3591,13 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, struct smb2_query_info_req *req; struct kvec *iov = rqst->rq_iov; unsigned int total_len; + size_t len; int rc; + if (unlikely(check_add_overflow(input_len, sizeof(*req), &len) || + len > CIFSMaxBufSize)) + return -EINVAL; + rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, server, (void **) &req, &total_len); if (rc) @@ -3591,7 +3619,7 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, iov[0].iov_base = (char *)req; /* 1 for Buffer */ - iov[0].iov_len = total_len - 1 + input_len; + iov[0].iov_len = len; return 0; } @@ -3599,7 +3627,7 @@ void SMB2_query_info_free(struct smb_rqst *rqst) { if (rqst && rqst->rq_iov) - cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ + cifs_buf_release(rqst->rq_iov[0].iov_base); /* request */ } static int @@ -5474,6 +5502,11 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, return 0; } +static inline void free_qfs_info_req(struct kvec *iov) +{ + cifs_buf_release(iov->iov_base); +} + int SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) @@ -5505,7 +5538,7 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(iov.iov_base); + free_qfs_info_req(&iov); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto posix_qfsinf_exit; @@ -5556,7 +5589,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(iov.iov_base); + free_qfs_info_req(&iov); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto qfsinf_exit; @@ -5623,7 +5656,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(iov.iov_base); + free_qfs_info_req(&iov); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto qfsattr_exit; diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 46eff9ec302a..0e371f7e2854 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -251,11 +251,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 0b90869fd805..43e237864a42 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -546,6 +546,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, if (strcmp(ei_child->name, name) != 0) continue; ret = simple_lookup(dir, dentry, flags); + if (IS_ERR(ret)) + goto out; create_dir_dentry(ei, ei_child, ei_dentry, true); created = true; break; @@ -568,6 +570,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, if (r <= 0) continue; ret = simple_lookup(dir, dentry, flags); + if (IS_ERR(ret)) + goto out; create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops, true); break; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index a44d9dc7e3eb..8e177b67e82f 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -5,6 +5,7 @@ #define _VIRTCHNL_H_ #include <linux/bitops.h> +#include <linux/bits.h> #include <linux/overflow.h> #include <uapi/linux/if_ether.h> diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2f54cc0436c4..7671530d6e4e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -52,10 +52,6 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; -struct bpf_token; -struct user_namespace; -struct super_block; -struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1488,7 +1484,6 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif - struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1613,31 +1608,6 @@ struct bpf_link_primer { u32 id; }; -struct bpf_mount_opts { - kuid_t uid; - kgid_t gid; - umode_t mode; - - /* BPF token-related delegation options */ - u64 delegate_cmds; - u64 delegate_maps; - u64 delegate_progs; - u64 delegate_attachs; -}; - -struct bpf_token { - struct work_struct work; - atomic64_t refcnt; - struct user_namespace *userns; - u64 allowed_cmds; - u64 allowed_maps; - u64 allowed_progs; - u64 allowed_attachs; -#ifdef CONFIG_SECURITY - void *security; -#endif -}; - struct bpf_struct_ops_value; struct btf_member; @@ -2097,7 +2067,6 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } -extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2232,26 +2201,24 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -bool bpf_token_capable(const struct bpf_token *token, int cap); - -static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) +static inline bool bpf_allow_ptr_leaks(void) { - return bpf_token_capable(token, CAP_PERFMON); + return perfmon_capable(); } -static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) +static inline bool bpf_allow_uninit_stack(void) { - return bpf_token_capable(token, CAP_PERFMON); + return perfmon_capable(); } -static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) +static inline bool bpf_bypass_spec_v1(void) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return cpu_mitigations_off() || perfmon_capable(); } -static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) +static inline bool bpf_bypass_spec_v4(void) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2268,21 +2235,8 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); -void bpf_token_inc(struct bpf_token *token); -void bpf_token_put(struct bpf_token *token); -int bpf_token_create(union bpf_attr *attr); -struct bpf_token *bpf_token_get_from_fd(u32 ufd); - -bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); -bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); -bool bpf_token_allow_prog_type(const struct bpf_token *token, - enum bpf_prog_type prog_type, - enum bpf_attach_type attach_type); - int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); -struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, - umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2512,12 +2466,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf_func_model *m); struct bpf_reg_state; -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg, u32 *nargs); +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, @@ -2526,8 +2475,7 @@ const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, - const struct bpf_prog *prog); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2646,24 +2594,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline bool bpf_token_capable(const struct bpf_token *token, int cap) -{ - return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); -} - -static inline void bpf_token_inc(struct bpf_token *token) -{ -} - -static inline void bpf_token_put(struct bpf_token *token) -{ -} - -static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline void __dev_flush(void) { } @@ -2787,7 +2717,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_base_func_proto(enum bpf_func_id func_id) { return NULL; } diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fc0d6f32c687..94baced5a1ad 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -142,9 +142,13 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter) +BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx) +BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit) #endif #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) +BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c2819a6579a5..d07d857ca67f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -606,6 +606,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) #define BPF_MAX_SUBPROGS 256 +struct bpf_subprog_arg_info { + enum bpf_arg_type arg_type; + union { + u32 mem_size; + }; +}; + struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ @@ -617,6 +624,10 @@ struct bpf_subprog_info { bool is_cb: 1; bool is_async_cb: 1; bool is_exception_cb: 1; + bool args_cached: 1; + + u8 arg_cnt; + struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; struct bpf_verifier_env; @@ -727,6 +738,16 @@ struct bpf_verifier_env { char tmp_str_buf[TMP_STR_BUF_LEN]; }; +static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) +{ + return &env->prog->aux->func_info_aux[subprog]; +} + +static inline struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env, int subprog) +{ + return &env->subprog_info[subprog]; +} + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, @@ -764,14 +785,6 @@ bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); -int check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno); -int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type); -int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno, u32 mem_size); - /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, struct btf *btf, u32 btf_id) diff --git a/include/linux/connector.h b/include/linux/connector.h index cec2d99ae902..70bc1160f3d8 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -100,8 +100,7 @@ void cn_del_callback(const struct cb_id *id); */ int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask, - int (*filter)(struct sock *dsk, struct sk_buff *skb, - void *data), + netlink_filter_fn filter, void *filter_data); /** diff --git a/include/linux/cred.h b/include/linux/cred.h index af8d353a4b86..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -109,14 +109,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif + atomic_long_t usage; kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -229,7 +182,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) { - atomic_add(nr, &cred->usage); + atomic_long_add(nr, &cred->usage); return cred; } @@ -264,7 +217,6 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred_many(nonconst_cred, nr); } @@ -288,9 +240,8 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; - if (!atomic_inc_not_zero(&nonconst_cred->usage)) + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -312,8 +263,7 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); - if (atomic_sub_and_test(nr, &cred->usage)) + if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } } diff --git a/include/linux/damon.h b/include/linux/damon.h index ab2f17d9926b..e00ddf1ed39c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -559,6 +559,8 @@ struct damon_ctx { * update */ unsigned long next_ops_update_sis; + /* for waiting until the execution of the kdamond_fn is started */ + struct completion kdamond_started; /* public: */ struct task_struct *kdamond; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index cfcd952a1d4f..325e0778e937 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -411,8 +411,10 @@ struct ethtool_pause_stats { * not entire FEC data blocks. This is a non-standard statistic. * Reported to user space as %ETHTOOL_A_FEC_STAT_CORR_BITS. * - * @lane: per-lane/PCS-instance counts as defined by the standard - * @total: error counts for the entire port, for drivers incapable of reporting + * For each of the above fields, the two substructure members are: + * + * - @lanes: per-lane/PCS-instance counts as defined by the standard + * - @total: error counts for the entire port, for drivers incapable of reporting * per-lane stats * * Drivers should fill in either only total or per-lane statistics, core diff --git a/include/linux/filter.h b/include/linux/filter.h index 12d907f17d36..68fb6c8142fe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1139,7 +1139,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) + if (bpf_jit_harden == 1 && bpf_capable()) return false; return true; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2f5554482047..83c4d060a559 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4448,7 +4448,8 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && action != WLAN_PUB_ACTION_FTM_REQUEST && action != WLAN_PUB_ACTION_FTM_RESPONSE && - action != WLAN_PUB_ACTION_FILS_DISCOVERY; + action != WLAN_PUB_ACTION_FILS_DISCOVERY && + action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; } /** diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 805bb635cdf5..239a4f68801b 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -434,6 +434,7 @@ enum { /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, + REQ_F_POLL_NO_LAZY_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -501,6 +502,8 @@ enum { REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + /* don't use lazy poll wake for this request */ + REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); diff --git a/include/linux/ism.h b/include/linux/ism.h index 9a4c204df3da..5428edd90982 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -86,7 +86,6 @@ int ism_register_dmb(struct ism_dev *dev, struct ism_dmb *dmb, int ism_unregister_dmb(struct ism_dev *dev, struct ism_dmb *dmb); int ism_move(struct ism_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); -u8 *ism_get_seid(void); const struct smcd_ops *ism_get_smcd_ops(void); diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 3fdd00b452ac..ff217a5ce552 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -398,17 +398,10 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, - struct path *path) -LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) -LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) -LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) +LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7ee5b79ff3d6..2bba88c67f58 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -681,6 +681,7 @@ struct mlx5e_resources { struct mlx5_sq_bfreg bfreg; #define MLX5_MAX_NUM_TC 8 u32 tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC]; + bool tisn_valid; } hw_objs; struct net_device *uplink_netdev; struct mutex uplink_netdev_lock; @@ -821,6 +822,7 @@ struct mlx5_core_dev { struct blocking_notifier_head macsec_nh; #endif u64 num_ipsec_offloads; + struct mlx5_sd *sd; }; struct mlx5_db { @@ -1222,6 +1224,14 @@ static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) return dev->caps.embedded_cpu; } +static inline bool mlx5_core_is_mgmt_pf(const struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN_2(dev, local_mng_port_valid)) + return false; + + return MLX5_CAP_GEN_2(dev, local_mng_port); +} + static inline bool mlx5_core_is_ecpf_esw_manager(const struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fee20fc010c2..586569209254 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1954,8 +1954,10 @@ enum { struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; - u8 migratable[0x1]; - u8 reserved_at_81[0x1f]; + u8 migratable[0x1]; + u8 reserved_at_81[0x19]; + u8 local_mng_port[0x1]; + u8 reserved_at_9b[0x5]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; @@ -1973,7 +1975,13 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 allowed_object_for_other_vhca_access[0x40]; - u8 reserved_at_140[0x60]; + u8 reserved_at_140[0x20]; + + u8 reserved_at_160[0xa]; + u8 local_mng_port_valid[0x1]; + u8 reserved_at_16b[0x15]; + + u8 reserved_at_180[0x20]; u8 flow_table_type_2_type[0x8]; u8 reserved_at_1a8[0x3]; @@ -4030,8 +4038,13 @@ struct mlx5_ifc_nic_vport_context_bits { u8 affiliation_criteria[0x4]; u8 affiliated_vhca_id[0x10]; - u8 reserved_at_60[0xd0]; + u8 reserved_at_60[0xa0]; + u8 reserved_at_100[0x1]; + u8 sd_group[0x3]; + u8 reserved_at_104[0x1c]; + + u8 reserved_at_120[0x10]; u8 mtu[0x10]; u8 system_image_guid[0x40]; @@ -10116,8 +10129,7 @@ struct mlx5_ifc_mpir_reg_bits { u8 reserved_at_20[0x20]; u8 local_port[0x8]; - u8 reserved_at_28[0x15]; - u8 sd_group[0x3]; + u8 reserved_at_28[0x18]; u8 reserved_at_60[0x20]; }; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index fbb9bf447889..c36cc6d82926 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); +int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, u16 vport, u64 node_guid); diff --git a/include/linux/mm.h b/include/linux/mm.h index 418d26608ece..da5219b48d52 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -886,8 +886,8 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) */ static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) { - return vma->vm_start <= vma->vm_mm->brk && - vma->vm_end >= vma->vm_mm->start_brk; + return vma->vm_start < vma->vm_mm->brk && + vma->vm_end > vma->vm_mm->start_brk; } /* @@ -901,8 +901,8 @@ static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) * its "stack". It's not even well-defined for programs written * languages like Go. */ - return vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack; + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; } static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 9ae7def16cb2..f4fe593c1400 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -232,22 +232,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; /* - * There are three common cases for this page: - * 1. If it's hot, e.g., freshly faulted in or previously hot and - * migrated, add it to the youngest generation. - * 2. If it's cold but can't be evicted immediately, i.e., an anon page - * not in swapcache or a dirty page pending writeback, add it to the - * second oldest generation. - * 3. Everything else (clean, cold) is added to the oldest generation. + * There are four common cases for this page: + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest + * generation, and it's protected over the rest below. + * 2. If it can't be evicted immediately, i.e., a dirty page pending + * writeback, add it to the second youngest generation. + * 3. If it should be evicted first, e.g., cold and clean from + * folio_rotate_reclaimable(), add it to the oldest generation. + * 4. Everything else falls between 2 & 3 above and is added to the + * second oldest generation if it's considered inactive, or the + * oldest generation otherwise. See lru_gen_is_active(). */ if (folio_test_active(folio)) seq = lrugen->max_seq; else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->min_seq[type] + 1; - else + seq = lrugen->max_seq - 1; + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) seq = lrugen->min_seq[type]; + else + seq = lrugen->min_seq[type] + 1; gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3c25226beeed..9db36e197712 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -505,33 +505,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * the old generation, is incremented when all its bins become empty. * * There are four operations: - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; - * 2. The first attempt to reclaim an memcg below low, which triggers + * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_YOUNG; - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * - * Note that memcg LRU only applies to global reclaim, and the round-robin - * incrementing of their max_seq counters ensures the eventual fairness to all - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * Notes: + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing + * of their max_seq counters ensures the eventual fairness to all eligible + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * 2. There are only two valid generations: old (seq) and young (seq+1). + * MEMCG_NR_GENS is set to three so that when reading the generation counter + * locklessly, a stale value (seq-1) does not wraparound to young. */ -#define MEMCG_NR_GENS 2 +#define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b935ee341b4..118c40258d07 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1329,6 +1329,9 @@ struct netdev_net_notifier { * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Deletes the MDB entry from dev. + * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[], + * struct netlink_ext_ack *extack); + * Bulk deletes MDB entries from dev. * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, * struct netlink_callback *cb); * Dumps MDB entries from dev. The first argument (marker) in the netlink @@ -1611,6 +1614,9 @@ struct net_device_ops { int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); + int (*ndo_mdb_del_bulk)(struct net_device *dev, + struct nlattr *tb[], + struct netlink_ext_ack *extack); int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); @@ -2108,6 +2114,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; + netdev_features_t gso_partial_features; unsigned int real_num_tx_queues; unsigned int gso_max_size; unsigned int gso_ipv4_max_size; @@ -2142,6 +2149,7 @@ struct net_device { /* RX read-mostly hotpath */ __cacheline_group_begin(net_device_read_rx); + struct bpf_prog __rcu *xdp_prog; struct list_head ptype_specific; int ifindex; unsigned int real_num_rx_queues; @@ -2204,7 +2212,6 @@ struct net_device { netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; - netdev_features_t gso_partial_features; unsigned int min_mtu; unsigned int max_mtu; @@ -2318,7 +2325,6 @@ struct net_device { const unsigned char *dev_addr; unsigned int num_rx_queues; - struct bpf_prog __rcu *xdp_prog; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. diff --git a/include/linux/netlink.h b/include/linux/netlink.h index abe91ed6b9aa..1a4445bf2ab9 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -228,10 +228,12 @@ bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); + +typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data); + int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, - struct sk_buff *skb, void *data), + netlink_filter_fn filter, void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); diff --git a/include/linux/pci.h b/include/linux/pci.h index 60ca768bc867..dea043bc1e38 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1829,6 +1829,7 @@ extern bool pcie_ports_native; int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); int pci_enable_link_state(struct pci_dev *pdev, int state); +int pci_enable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1839,6 +1840,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } static inline int pci_enable_link_state(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 875439ab45de..d589f89c612c 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -99,72 +99,6 @@ static inline bool phylink_autoneg_inband(unsigned int mode) } /** - * phylink_pcs_neg_mode() - helper to determine PCS inband mode - * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. - * @interface: interface mode to be used - * @advertising: adertisement ethtool link mode mask - * - * Determines the negotiation mode to be used by the PCS, and returns - * one of: - * - * - %PHYLINK_PCS_NEG_NONE: interface mode does not support inband - * - %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY) - * will be used. - * - %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg - * disabled - * - %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled - * - * Note: this is for cases where the PCS itself is involved in negotiation - * (e.g. Clause 37, SGMII and similar) not Clause 73. - */ -static inline unsigned int phylink_pcs_neg_mode(unsigned int mode, - phy_interface_t interface, - const unsigned long *advertising) -{ - unsigned int neg_mode; - - switch (interface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: - case PHY_INTERFACE_MODE_USXGMII: - /* These protocols are designed for use with a PHY which - * communicates its negotiation result back to the MAC via - * inband communication. Note: there exist PHYs that run - * with SGMII but do not send the inband data. - */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; - break; - - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* 1000base-X is designed for use media-side for Fibre - * connections, and thus the Autoneg bit needs to be - * taken into account. We also do this for 2500base-X - * as well, but drivers may not support this, so may - * need to override this. - */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising)) - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED; - break; - - default: - neg_mode = PHYLINK_PCS_NEG_NONE; - break; - } - - return neg_mode; -} - -/** * struct phylink_link_state - link state structure * @advertising: ethtool bitmask containing advertised link modes * @lp_advertising: ethtool bitmask containing link partner advertised link diff --git a/include/linux/security.h b/include/linux/security.h index 00809d2d5c38..1d1df326c881 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,7 +32,6 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/sockptr.h> -#include <linux/bpf.h> struct linux_binprm; struct cred; @@ -2021,22 +2020,15 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_token; +struct bpf_prog_aux; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token); +extern int security_bpf_map_alloc(struct bpf_map *map); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token); -extern void security_bpf_prog_free(struct bpf_prog *prog); -extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path); -extern void security_bpf_token_free(struct bpf_token *token); -extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); -extern int security_bpf_token_capable(const struct bpf_token *token, int cap); +extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); +extern void security_bpf_prog_free(struct bpf_prog_aux *aux); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2054,8 +2046,7 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) +static inline int security_bpf_map_alloc(struct bpf_map *map) { return 0; } @@ -2063,33 +2054,13 @@ static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *a static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) +static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog *prog) +static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) { } - -static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) -{ - return 0; -} - -static inline void security_bpf_token_free(struct bpf_token *token) -{ } - -static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) -{ - return 0; -} - -static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) -{ - return 0; -} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ea5c8ab3ed00..a5ae952454c8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -754,7 +754,6 @@ typedef unsigned char *sk_buff_data_t; * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) - * @sp: the security path, used for xfrm * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header @@ -788,7 +787,6 @@ typedef unsigned char *sk_buff_data_t; * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) - * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @hash: the packet hash @@ -4009,6 +4007,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); +int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); diff --git a/include/net/act_api.h b/include/net/act_api.h index ea13e1e4a7c2..447985a45ef6 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -137,6 +137,7 @@ struct tc_action_ops { #ifdef CONFIG_NET_CLS_ACT +#define ACT_P_BOUND 0 #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 20988623c5cc..8f8dd9173714 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -189,6 +189,7 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 type; u8 val[16]; }; @@ -198,6 +199,7 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -212,6 +214,7 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; + u8 link_type; u8 val[16]; }; @@ -219,6 +222,8 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; + u8 bdaddr_type; + u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; @@ -534,7 +539,6 @@ struct hci_dev { struct work_struct tx_work; struct delayed_work le_scan_disable; - struct delayed_work le_scan_restart; struct sk_buff_head rx_q; struct sk_buff_head raw_q; @@ -952,7 +956,6 @@ void hci_inquiry_cache_flush(struct hci_dev *hdev); /* ----- HCI Connections ----- */ enum { HCI_CONN_AUTH_PEND, - HCI_CONN_REAUTH_PEND, HCI_CONN_ENCRYPT_PEND, HCI_CONN_RSWITCH_PEND, HCI_CONN_MODE_CHANGE_PEND, @@ -1227,11 +1230,11 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, continue; /* Match CIG ID if set */ - if (cig != BT_ISO_QOS_CIG_UNSET && cig != c->iso_qos.ucast.cig) + if (cig != c->iso_qos.ucast.cig) continue; /* Match CIS ID if set */ - if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis) + if (id != c->iso_qos.ucast.cis) continue; /* Match destination address if set */ @@ -1293,6 +1296,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, } static inline struct hci_conn * +hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK || + c->state != state) + continue; + + if (handle == c->iso_qos.bcast.big) { + rcu_read_unlock(); + return c; + } + } + + rcu_read_unlock(); + + return NULL; +} + +static inline struct hci_conn * hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) { struct hci_conn_hash *h = &hdev->conn_hash; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index f79ce133e51a..cd95711b12b8 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -20,6 +20,7 @@ struct wpan_phy; struct wpan_phy_cca; struct cfg802154_scan_request; struct cfg802154_beacon_request; +struct ieee802154_addr; #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL struct ieee802154_llsec_device_key; @@ -77,6 +78,12 @@ struct cfg802154_ops { struct cfg802154_beacon_request *request); int (*stop_beacons)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev); + int (*associate)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord); + int (*disassociate)(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target); #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL void (*get_llsec_table)(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, @@ -304,6 +311,22 @@ struct ieee802154_coord_desc { }; /** + * struct ieee802154_pan_device - PAN device information + * @pan_id: the PAN ID of this device + * @mode: the preferred mode to reach the device + * @short_addr: the short address of this device + * @extended_addr: the extended address of this device + * @node: the list node + */ +struct ieee802154_pan_device { + __le16 pan_id; + u8 mode; + __le16 short_addr; + __le64 extended_addr; + struct list_head node; +}; + +/** * struct cfg802154_scan_request - Scan request * * @type: type of scan to be performed @@ -478,6 +501,13 @@ struct wpan_dev { /* fallback for acknowledgment bit setting */ bool ackreq; + + /* Associations */ + struct mutex association_lock; + struct ieee802154_pan_device *parent; + struct list_head children; + unsigned int max_associations; + unsigned int nchildren; }; #define to_phy(_dev) container_of(_dev, struct wpan_phy, dev) @@ -529,4 +559,46 @@ static inline const char *wpan_phy_name(struct wpan_phy *phy) void ieee802154_configure_durations(struct wpan_phy *phy, unsigned int page, unsigned int channel); +/** + * cfg802154_device_is_associated - Checks whether we are associated to any device + * @wpan_dev: the wpan device + * @return: true if we are associated + */ +bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev); + +/** + * cfg802154_device_is_parent - Checks if a device is our coordinator + * @wpan_dev: the wpan device + * @target: the expected parent + * @return: true if @target is our coordinator + */ +bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target); + +/** + * cfg802154_device_is_child - Checks whether a device is associated to us + * @wpan_dev: the wpan device + * @target: the expected child + * @return: the PAN device + */ +struct ieee802154_pan_device * +cfg802154_device_is_child(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target); + +/** + * cfg802154_set_max_associations - Limit the number of future associations + * @wpan_dev: the wpan device + * @max: the maximum number of devices we accept to associate + * @return: the old maximum value + */ +unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev, + unsigned int max); + +/** + * cfg802154_get_free_short_addr - Get a free address among the known devices + * @wpan_dev: the wpan device + * @return: a random short address expectedly unused on our PAN + */ +__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev); + #endif /* __NET_CFG802154_H */ diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 278e4c7d465c..6d3a20163260 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -85,8 +85,10 @@ FN(IPV6_NDISC_BAD_OPTIONS) \ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ - FN(TC_ERROR) \ + FN(TC_COOKIE_ERROR) \ FN(PACKET_SOCK_ERROR) \ + FN(TC_CHAIN_NOTFOUND) \ + FN(TC_RECLASSIFY_LOOP) \ FNe(MAX) /** @@ -377,13 +379,23 @@ enum skb_drop_reason { SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST, /** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */ SKB_DROP_REASON_QUEUE_PURGE, - /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */ - SKB_DROP_REASON_TC_ERROR, + /** + * @SKB_DROP_REASON_TC_COOKIE_ERROR: An error occurred whilst + * processing a tc ext cookie. + */ + SKB_DROP_REASON_TC_COOKIE_ERROR, /** * @SKB_DROP_REASON_PACKET_SOCK_ERROR: generic packet socket errors * after its filter matches an incoming packet. */ SKB_DROP_REASON_PACKET_SOCK_ERROR, + /** @SKB_DROP_REASON_TC_CHAIN_NOTFOUND: tc chain lookup failed. */ + SKB_DROP_REASON_TC_CHAIN_NOTFOUND, + /** + * @SKB_DROP_REASON_TC_RECLASSIFY_LOOP: tc exceeded max reclassify loop + * iterations. + */ + SKB_DROP_REASON_TC_RECLASSIFY_LOOP, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 82da359bca03..d17855c52ef9 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -172,8 +172,7 @@ void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); -int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, - u32 flags); +int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index c53244f20437..e61469129402 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -8,16 +8,19 @@ #define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) +/* Binding to multicast group requires %CAP_NET_ADMIN */ +#define GENL_MCAST_CAP_NET_ADMIN BIT(0) +/* Binding to multicast group requires %CAP_SYS_ADMIN */ +#define GENL_MCAST_CAP_SYS_ADMIN BIT(1) + /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family - * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) - * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding + * @flags: GENL_MCAST_* flags */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; - u8 cap_sys_admin:1; }; struct genl_split_ops; @@ -51,6 +54,9 @@ struct genl_info; * @split_ops: the split do/dump form of operation definition * @n_split_ops: number of entries in @split_ops, not that with split do/dump * ops the number of entries is not the same as number of commands + * @sock_priv_size: the size of per-socket private memory + * @sock_priv_init: the per-socket private memory initializer + * @sock_priv_destroy: the per-socket private memory destructor * * Attribute policies (the combination of @policy and @maxattr fields) * can be attached at the family level or at the operation level. @@ -84,11 +90,17 @@ struct genl_family { const struct genl_multicast_group *mcgrps; struct module *module; + size_t sock_priv_size; + void (*sock_priv_init)(void *priv); + void (*sock_priv_destroy)(void *priv); + /* private: internal use only */ /* protocol family identifier */ int id; /* starting number of multicast group IDs in this family */ unsigned int mcgrp_offset; + /* list of per-socket privs */ + struct xarray *sock_privs; }; /** @@ -298,6 +310,8 @@ static inline bool genl_info_is_ntf(const struct genl_info *info) return !info->nlhdr; } +void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk); +void *genl_sk_priv_get(struct genl_family *family, struct sock *sk); int genl_register_family(struct genl_family *family); int genl_unregister_family(const struct genl_family *family); void genl_notify(const struct genl_family *family, struct sk_buff *skb, @@ -438,6 +452,35 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) } /** + * genlmsg_multicast_netns_filtered - multicast a netlink message + * to a specific netns with filter + * function + * @family: the generic netlink family + * @net: the net namespace + * @skb: netlink message as socket buffer + * @portid: own netlink portid to avoid sending to yourself + * @group: offset of multicast group in groups array + * @flags: allocation flags + * @filter: filter function + * @filter_data: filter function private data + * + * Return: 0 on success, negative error code for failure. + */ +static inline int +genlmsg_multicast_netns_filtered(const struct genl_family *family, + struct net *net, struct sk_buff *skb, + u32 portid, unsigned int group, gfp_t flags, + netlink_filter_fn filter, + void *filter_data) +{ + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; + return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group, + flags, filter, filter_data); +} + +/** * genlmsg_multicast_netns - multicast a netlink message to a specific netns * @family: the generic netlink family * @net: the net namespace @@ -450,10 +493,8 @@ static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (WARN_ON_ONCE(group >= family->n_mcgrps)) - return -EINVAL; - group = family->mcgrp_offset + group; - return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); + return genlmsg_multicast_netns_filtered(family, net, skb, portid, + group, flags, NULL, NULL); } /** diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 063313df447d..4de858f9929e 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -125,6 +125,35 @@ struct ieee802154_hdr_fc { #endif }; +struct ieee802154_assoc_req_pl { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 reserved1:1, + device_type:1, + power_source:1, + rx_on_when_idle:1, + assoc_type:1, + reserved2:1, + security_cap:1, + alloc_addr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 alloc_addr:1, + security_cap:1, + reserved2:1, + assoc_type:1, + rx_on_when_idle:1, + power_source:1, + device_type:1, + reserved1:1; +#else +#error "Please fix <asm/byteorder.h>" +#endif +} __packed; + +struct ieee802154_assoc_resp_pl { + __le16 short_addr; + u8 status; +} __packed; + enum ieee802154_frame_version { IEEE802154_2003_STD, IEEE802154_2006_STD, @@ -140,6 +169,19 @@ enum ieee802154_addressing_mode { IEEE802154_EXTENDED_ADDRESSING, }; +enum ieee802154_association_status { + IEEE802154_ASSOCIATION_SUCCESSFUL = 0x00, + IEEE802154_PAN_AT_CAPACITY = 0x01, + IEEE802154_PAN_ACCESS_DENIED = 0x02, + IEEE802154_HOPPING_SEQUENCE_OFFSET_DUP = 0x03, + IEEE802154_FAST_ASSOCIATION_SUCCESSFUL = 0x80, +}; + +enum ieee802154_disassociation_reason { + IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE = 0x1, + IEEE802154_DEVICE_WISHES_TO_LEAVE = 0x2, +}; + struct ieee802154_hdr { struct ieee802154_hdr_fc fc; u8 seq; @@ -163,6 +205,24 @@ struct ieee802154_beacon_req_frame { struct ieee802154_mac_cmd_pl mac_pl; }; +struct ieee802154_association_req_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; + struct ieee802154_assoc_req_pl assoc_req_pl; +}; + +struct ieee802154_association_resp_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; + struct ieee802154_assoc_resp_pl assoc_resp_pl; +}; + +struct ieee802154_disassociation_notif_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; + u8 disassoc_pl; +}; + /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from * the contents of hdr will be, and the actual value of those bits in * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3ecfeadbfa06..7f1b38458743 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -88,7 +88,7 @@ struct inet_bind_bucket { unsigned short fast_sk_family; bool fast_ipv6_only; struct hlist_node node; - struct hlist_head owners; + struct hlist_head bhash2; }; struct inet_bind2_bucket { @@ -96,22 +96,17 @@ struct inet_bind2_bucket { int l3mdev; unsigned short port; #if IS_ENABLED(CONFIG_IPV6) - unsigned short family; -#endif - union { -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr v6_rcv_saddr; + unsigned short addr_type; + struct in6_addr v6_rcv_saddr; +#define rcv_saddr v6_rcv_saddr.s6_addr32[3] +#else + __be32 rcv_saddr; #endif - __be32 rcv_saddr; - }; /* Node in the bhash2 inet_bind_hashbucket chain */ struct hlist_node node; + struct hlist_node bhash_node; /* List of sockets hashed to this bucket */ struct hlist_head owners; - /* bhash has twsk in owners, but bhash2 has twsk in - * deathrow not to add a member in struct sock_common. - */ - struct hlist_head deathrow; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) @@ -241,7 +236,7 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, struct inet_bind2_bucket * inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - unsigned short port, int l3mdev, + struct inet_bind_bucket *tb, const struct sock *sk); void inet_bind2_bucket_destroy(struct kmem_cache *cachep, diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b14999ff55db..f28da08a37b4 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -75,13 +75,9 @@ struct inet_timewait_sock { struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; struct inet_bind2_bucket *tw_tb2; - struct hlist_node tw_bind2_node; }; #define tw_tclass tw_tos -#define twsk_for_each_bound_bhash2(__tw, list) \ - hlist_for_each_entry(__tw, list, tw_bind2_node) - static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 95ed495c3a40..9ba6413fd2e3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -179,9 +179,6 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; - - struct hlist_node gc_link; - struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -250,6 +247,19 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +static inline void fib6_clean_expires(struct fib6_info *f6i) +{ + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; +} + +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) +{ + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; +} + static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) @@ -257,11 +267,6 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i) return false; } -static inline bool fib6_has_expires(const struct fib6_info *f6i) -{ - return f6i->fib6_flags & RTF_EXPIRES; -} - /* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely @@ -328,10 +333,8 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { - DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); - } } enum fib6_walk_state { @@ -385,7 +388,6 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; - struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -502,48 +504,6 @@ void fib6_gc_cleanup(void); int fib6_init(void); -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. - */ -static inline void fib6_set_expires_locked(struct fib6_info *f6i, - unsigned long expires) -{ - struct fib6_table *tb6; - - tb6 = f6i->fib6_table; - f6i->expires = expires; - if (tb6 && !fib6_has_expires(f6i)) - hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist); - f6i->fib6_flags |= RTF_EXPIRES; -} - -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the - * list of GC candidates until it is inserted into a table. - */ -static inline void fib6_set_expires(struct fib6_info *f6i, - unsigned long expires) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_set_expires_locked(f6i, expires); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - -static inline void fib6_clean_expires_locked(struct fib6_info *f6i) -{ - if (fib6_has_expires(f6i)) - hlist_del_init(&f6i->gc_link); - f6i->fib6_flags &= ~RTF_EXPIRES; - f6i->expires = 0; -} - -static inline void fib6_clean_expires(struct fib6_info *f6i) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_clean_expires_locked(f6i); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 78d38dd88aba..cf25ea21d770 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -784,11 +784,6 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) cpu_to_be32(0x0000ffff))) == 0UL; } -static inline bool ipv6_addr_v4mapped_any(const struct in6_addr *a) -{ - return ipv6_addr_v4mapped(a) && ipv4_is_zeronet(a->s6_addr32[3]); -} - static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a) { return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]); diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index f9e88401d7da..8b2055d64a6b 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -80,7 +80,7 @@ struct iucv_array { u32 length; } __attribute__ ((aligned (8))); -extern struct bus_type iucv_bus; +extern const struct bus_type iucv_bus; extern struct device *iucv_root; /* @@ -489,7 +489,7 @@ struct iucv_interface { int (*path_sever)(struct iucv_path *path, u8 userdata[16]); int (*iucv_register)(struct iucv_handler *handler, int smp); void (*iucv_unregister)(struct iucv_handler *handler, int smp); - struct bus_type *bus; + const struct bus_type *bus; struct device *root; }; diff --git a/include/net/macsec.h b/include/net/macsec.h index ebf9bc54036a..dbd22180cc5c 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -247,6 +247,23 @@ struct macsec_secy { /** * struct macsec_context - MACsec context for hardware offloading + * @netdev: a valid pointer to a struct net_device if @offload == + * MACSEC_OFFLOAD_MAC + * @phydev: a valid pointer to a struct phy_device if @offload == + * MACSEC_OFFLOAD_PHY + * @offload: MACsec offload status + * @secy: pointer to a MACsec SecY + * @rx_sc: pointer to a RX SC + * @update_pn: when updating the SA, update the next PN + * @assoc_num: association number of the target SA + * @key: key of the target SA + * @rx_sa: pointer to an RX SA if a RX SA is added/updated/removed + * @tx_sa: pointer to an TX SA if a TX SA is added/updated/removed + * @tx_sc_stats: pointer to TX SC stats structure + * @tx_sa_stats: pointer to TX SA stats structure + * @rx_sc_stats: pointer to RX SC stats structure + * @rx_sa_stats: pointer to RX SA stats structure + * @dev_stats: pointer to dev stats structure */ struct macsec_context { union { @@ -277,6 +294,33 @@ struct macsec_context { /** * struct macsec_ops - MACsec offloading operations + * @mdo_dev_open: called when the MACsec interface transitions to the up state + * @mdo_dev_stop: called when the MACsec interface transitions to the down + * state + * @mdo_add_secy: called when a new SecY is added + * @mdo_upd_secy: called when the SecY flags are changed or the MAC address of + * the MACsec interface is changed + * @mdo_del_secy: called when the hw offload is disabled or the MACsec + * interface is removed + * @mdo_add_rxsc: called when a new RX SC is added + * @mdo_upd_rxsc: called when a certain RX SC is updated + * @mdo_del_rxsc: called when a certain RX SC is removed + * @mdo_add_rxsa: called when a new RX SA is added + * @mdo_upd_rxsa: called when a certain RX SA is updated + * @mdo_del_rxsa: called when a certain RX SA is removed + * @mdo_add_txsa: called when a new TX SA is added + * @mdo_upd_txsa: called when a certain TX SA is updated + * @mdo_del_txsa: called when a certain TX SA is removed + * @mdo_get_dev_stats: called when dev stats are read + * @mdo_get_tx_sc_stats: called when TX SC stats are read + * @mdo_get_tx_sa_stats: called when TX SA stats are read + * @mdo_get_rx_sc_stats: called when RX SC stats are read + * @mdo_get_rx_sa_stats: called when RX SA stats are read + * @mdo_insert_tx_tag: called to insert the TX tag + * @needed_headroom: number of bytes reserved at the beginning of the sk_buff + * for the TX tag + * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the + * TX tag */ struct macsec_ops { /* Device wide */ @@ -303,6 +347,11 @@ struct macsec_ops { int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx); + /* Offload tag */ + int (*mdo_insert_tx_tag)(struct phy_device *phydev, + struct sk_buff *skb); + unsigned int needed_headroom; + unsigned int needed_tailroom; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); @@ -325,4 +374,9 @@ static inline void *macsec_netdev_priv(const struct net_device *dev) return netdev_priv(dev); } +static inline u64 sci_to_cpu(sci_t sci) +{ + return be64_to_cpu((__force __be64)sci); +} + #endif /* _NET_MACSEC_H_ */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 692d5955911c..956c752ceb31 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -74,12 +74,13 @@ enum nf_flowtable_flags { }; struct nf_flowtable { - struct list_head list; - struct rhashtable rhashtable; - int priority; + unsigned int flags; /* readonly in datapath */ + int priority; /* control path (padding hole) */ + struct rhashtable rhashtable; /* datapath, read-mostly members come first */ + + struct list_head list; /* slowpath parts */ const struct nf_flowtable_type *type; struct delayed_work gc_work; - unsigned int flags; struct flow_block flow_block; struct rw_semaphore flow_block_lock; /* Guards flow_block */ possible_net_t net; diff --git a/include/net/netlink.h b/include/net/netlink.h index 28039e57070a..c19ff921b661 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1087,21 +1087,29 @@ static inline void nlmsg_free(struct sk_buff *skb) } /** - * nlmsg_multicast - multicast a netlink message + * nlmsg_multicast_filtered - multicast a netlink message with filter function * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags + * @filter: filter function + * @filter_data: filter function private data + * + * Return: 0 on success, negative error code for failure. */ -static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 portid, unsigned int group, gfp_t flags) +static inline int nlmsg_multicast_filtered(struct sock *sk, struct sk_buff *skb, + u32 portid, unsigned int group, + gfp_t flags, + netlink_filter_fn filter, + void *filter_data) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, portid, group, flags); + err = netlink_broadcast_filtered(sk, skb, portid, group, flags, + filter, filter_data); if (err > 0) err = 0; @@ -1109,6 +1117,21 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, } /** + * nlmsg_multicast - multicast a netlink message + * @sk: netlink socket to spread messages to + * @skb: netlink message as socket buffer + * @portid: own netlink portid to avoid sending to yourself + * @group: multicast group id + * @flags: allocation flags + */ +static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 portid, unsigned int group, gfp_t flags) +{ + return nlmsg_multicast_filtered(sk, skb, portid, group, flags, + NULL, NULL); +} + +/** * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 8cd9d141f5af..4c752f799957 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -78,6 +78,10 @@ enum nl802154_commands { NL802154_CMD_SCAN_DONE, NL802154_CMD_SEND_BEACONS, NL802154_CMD_STOP_BEACONS, + NL802154_CMD_ASSOCIATE, + NL802154_CMD_DISASSOCIATE, + NL802154_CMD_SET_MAX_ASSOCIATIONS, + NL802154_CMD_LIST_ASSOCIATIONS, /* add new commands above here */ @@ -147,6 +151,8 @@ enum nl802154_attrs { NL802154_ATTR_SCAN_DURATION, NL802154_ATTR_SCAN_DONE_REASON, NL802154_ATTR_BEACON_INTERVAL, + NL802154_ATTR_MAX_ASSOCIATIONS, + NL802154_ATTR_PEER, /* add attributes here, update the policy in nl802154.c */ @@ -385,8 +391,6 @@ enum nl802154_supported_bool_states { NL802154_SUPPORTED_BOOL_MAX = __NL802154_SUPPORTED_BOOL_AFTER_LAST - 1 }; -#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL - enum nl802154_dev_addr_modes { NL802154_DEV_ADDR_NONE, __NL802154_DEV_ADDR_INVALID, @@ -406,12 +410,26 @@ enum nl802154_dev_addr_attrs { NL802154_DEV_ADDR_ATTR_SHORT, NL802154_DEV_ADDR_ATTR_EXTENDED, NL802154_DEV_ADDR_ATTR_PAD, + NL802154_DEV_ADDR_ATTR_PEER_TYPE, /* keep last */ __NL802154_DEV_ADDR_ATTR_AFTER_LAST, NL802154_DEV_ADDR_ATTR_MAX = __NL802154_DEV_ADDR_ATTR_AFTER_LAST - 1 }; +enum nl802154_peer_type { + NL802154_PEER_TYPE_UNSPEC, + + NL802154_PEER_TYPE_PARENT, + NL802154_PEER_TYPE_CHILD, + + /* keep last */ + __NL802154_PEER_TYPE_AFTER_LAST, + NL802154_PEER_TYPE_MAX = __NL802154_PEER_TYPE_AFTER_LAST - 1 +}; + +#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL + enum nl802154_key_id_modes { NL802154_KEY_ID_MODE_IMPLICIT, NL802154_KEY_ID_MODE_INDEX, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a76c9171db0e..f308e8268651 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -154,12 +154,6 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } -static inline void tcf_set_drop_reason(struct tcf_result *res, - enum skb_drop_reason reason) -{ - res->drop_reason = reason; -} - static inline void __tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9fa1d0794dfa..1e200d9a066d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -275,24 +275,6 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } -struct tc_skb_cb { - struct qdisc_skb_cb qdisc_cb; - - u16 mru; - u8 post_ct:1; - u8 post_ct_snat:1; - u8 post_ct_dnat:1; - u16 zone; /* Only valid if post_ct = true */ -}; - -static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) -{ - struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; - - BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); - return cb; -} - static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, unsigned long cl, struct qdisc_walker *arg) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index dcb9160e6467..ba3e1b315de8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -19,6 +19,7 @@ #include <net/gen_stats.h> #include <net/rtnetlink.h> #include <net/flow_offload.h> +#include <linux/xarray.h> struct Qdisc_ops; struct qdisc_walker; @@ -332,7 +333,6 @@ struct tcf_result { }; const struct tcf_proto *goto_tp; }; - enum skb_drop_reason drop_reason; }; struct tcf_chain; @@ -457,6 +457,7 @@ struct tcf_chain { }; struct tcf_block { + struct xarray ports; /* datapath accessible */ /* Lock protects tcf_block and lifetime-management data of chains * attached to the block (refcnt, action_refcnt, explicitly_created). */ @@ -483,6 +484,8 @@ struct tcf_block { struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ }; +struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index); + static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) { return lockdep_is_held(&chain->filter_chain_lock); @@ -1037,6 +1040,37 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) return skb; } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + u32 drop_reason; + + u16 zone; /* Only valid if post_ct = true */ + u16 mru; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + +static inline enum skb_drop_reason +tcf_get_drop_reason(const struct sk_buff *skb) +{ + return tc_skb_cb(skb)->drop_reason; +} + +static inline void tcf_set_drop_reason(const struct sk_buff *skb, + enum skb_drop_reason reason) +{ + tc_skb_cb(skb)->drop_reason = reason; +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/include/net/smc.h b/include/net/smc.h index a002552be29c..c9dcb30e3fd9 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -52,9 +52,14 @@ struct smcd_dmb { struct smcd_dev; struct ism_client; +struct smcd_gid { + u64 gid; + u64 gid_ext; +}; + struct smcd_ops { - int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid, - u32 vid); + int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid, + u32 vid_valid, u32 vid); int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb, struct ism_client *client); int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb); @@ -62,14 +67,13 @@ struct smcd_ops { int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id); int (*set_vlan_required)(struct smcd_dev *dev); int (*reset_vlan_required)(struct smcd_dev *dev); - int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq, - u32 event_code, u64 info); + int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid, + u32 trigger_irq, u32 event_code, u64 info); int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx, bool sf, unsigned int offset, void *data, unsigned int size); int (*supports_v2)(void); - u8* (*get_system_eid)(void); - u64 (*get_local_gid)(struct smcd_dev *dev); + void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid); u16 (*get_chid)(struct smcd_dev *dev); struct device* (*get_dev)(struct smcd_dev *dev); }; diff --git a/include/net/sock.h b/include/net/sock.h index 8b6fe164b218..a7f815c7cfdf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -76,19 +76,6 @@ * the other protocols. */ -/* Define this to get the SOCK_DBG debugging facility. */ -#define SOCK_DEBUGGING -#ifdef SOCK_DEBUGGING -#define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \ - printk(KERN_DEBUG msg); } while (0) -#else -/* Validate arguments and do nothing */ -static inline __printf(2, 3) -void SOCK_DEBUG(const struct sock *sk, const char *msg, ...) -{ -} -#endif - /* This is the per-socket lock. The spinlock provides a synchronization * between user contexts and software interrupt processing, whereas the * mini-semaphore synchronizes multiple users amongst themselves. @@ -277,8 +264,6 @@ struct sk_filter; * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes - * @__sk_flags_offset: empty field used to determine location of bitfield - * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -352,7 +337,6 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference - * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -544,7 +528,6 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; - struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -873,16 +856,6 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } -static inline void __sk_del_bind2_node(struct sock *sk) -{ - __hlist_del(&sk->sk_bind2_node); -} - -static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) -{ - hlist_add_head(&sk->sk_bind2_node, list); -} - #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -900,8 +873,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) -#define sk_for_each_bound_bhash2(__sk, list) \ - hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset @@ -2799,6 +2770,11 @@ static inline bool sk_is_tcp(const struct sock *sk) return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; } +static inline bool sk_is_stream_unix(const struct sock *sk) +{ + return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h deleted file mode 100644 index 4225fcb1c6ba..000000000000 --- a/include/net/tc_act/tc_ipt.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NET_TC_IPT_H -#define __NET_TC_IPT_H - -#include <net/act_api.h> - -struct xt_entry_target; - -struct tcf_ipt { - struct tc_action common; - u32 tcfi_hook; - char *tcfi_tname; - struct xt_entry_target *tcfi_t; -}; -#define to_ipt(a) ((struct tcf_ipt *)a) - -#endif /* __NET_TC_IPT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 32ce8ea36950..75722d967bf2 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,6 +8,7 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; + u32 tcfm_blockid; bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; netdevice_tracker tcfm_dev_tracker; diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index a6d481b5bcbc..a608546bcefc 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -117,10 +117,6 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, if (a->ops->act == tcf_ife_act) return tcf_ife_act(skb, a, res); #endif -#if IS_BUILTIN(CONFIG_NET_ACT_IPT) - if (a->ops->act == tcf_ipt_act) - return tcf_ipt_act(skb, a, res); -#endif #if IS_BUILTIN(CONFIG_NET_ACT_SIMP) if (a->ops->act == tcf_simp_act) return tcf_simp_act(skb, a, res); diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index b62bb8525a5f..526c1e7f505e 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -12,14 +12,14 @@ #define XDP_UMEM_MIN_CHUNK_SHIFT 11 #define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) -#ifdef CONFIG_XDP_SOCKETS - struct xsk_cb_desc { void *src; u8 off; u8 bytes; }; +#ifdef CONFIG_XDP_SOCKETS + void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f4d3090efe..754e68ca8744 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,36 +847,6 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * - * BPF_TOKEN_CREATE - * Description - * Create BPF token with embedded information about what - * BPF-related functionality it allows: - * - a set of allowed bpf() syscall commands; - * - a set of allowed BPF map types to be created with - * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; - * - a set of allowed BPF program types and BPF program attach - * types to be loaded with BPF_PROG_LOAD command, if - * BPF_PROG_LOAD itself is allowed. - * - * BPF token is created (derived) from an instance of BPF FS, - * assuming it has necessary delegation mount options specified. - * This BPF token can be passed as an extra parameter to various - * bpf() syscall commands to grant BPF subsystem functionality to - * unprivileged processes. - * - * When created, BPF token is "associated" with the owning - * user namespace of BPF FS instance (super block) that it was - * derived from, and subsequent BPF operations performed with - * BPF token would be performing capabilities checks (i.e., - * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within - * that user namespace. Without BPF token, such capabilities - * have to be granted in init user namespace, making bpf() - * syscall incompatible with user namespace, for the most part. - * - * Return - * A new file descriptor (a nonnegative integer), or -1 if an - * error occurred (in which case, *errno* is set appropriately). - * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -931,8 +901,6 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, - BPF_TOKEN_CREATE, - __MAX_BPF_CMD, }; enum bpf_map_type { @@ -983,7 +951,6 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, - __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1028,7 +995,6 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, - __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1437,7 +1403,6 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; - __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1507,7 +1472,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; - __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1620,7 +1584,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; - __u32 btf_token_fd; }; struct { @@ -1751,11 +1714,6 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; - struct { /* struct used by BPF_TOKEN_CREATE command */ - __u32 flags; - __u32 bpffs_fd; - } token_create; - } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3c8383d342d..130cae0d3e20 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -139,6 +139,8 @@ enum devlink_command { DEVLINK_CMD_SELFTESTS_GET, /* can dump */ DEVLINK_CMD_SELFTESTS_RUN, + DEVLINK_CMD_NOTIFY_FILTER_SET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 0787d561ace0..06ef6b78b7de 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2002,6 +2002,7 @@ static inline int ethtool_validate_duplex(__u8 duplex) * be exploited to reduce the RSS queue spread. */ #define RXH_XFRM_SYM_XOR (1 << 0) +#define RXH_XFRM_NO_CHANGE 0xff /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ @@ -2139,18 +2140,6 @@ enum ethtool_reset_flags { * refused. For drivers: ignore this field (use kernel's * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will * be overwritten by kernel. - * @supported: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features for which the interface - * supports autonegotiation or auto-detection. Read-only. - * @advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features that are advertised through - * autonegotiation or enabled for auto-detection. - * @lp_advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, and other - * link features that the link partner advertised through - * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. @@ -2192,6 +2181,21 @@ enum ethtool_reset_flags { * %set_link_ksettings() should validate all fields other than @cmd * and @link_mode_masks_nwords that are not described as read-only or * deprecated, and must ignore all fields described as read-only. + * + * @link_mode_masks is divided into three bitfields, each of length + * @link_mode_masks_nwords: + * - supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * - advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * - lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { __u32 cmd; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2e23f99dc0f1..a5b743a2f775 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -757,6 +757,7 @@ enum { MDBE_ATTR_VNI, MDBE_ATTR_IFINDEX, MDBE_ATTR_SRC_VNI, + MDBE_ATTR_STATE_MASK, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index c7082cc60d21..ea277039f89d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -99,7 +99,7 @@ enum { * versions. */ #define TCA_ACT_GACT 5 -#define TCA_ACT_IPT 6 +#define TCA_ACT_IPT 6 /* obsoleted, can be reused */ #define TCA_ACT_PEDIT 7 #define TCA_ACT_MIRRED 8 #define TCA_ACT_NAT 9 @@ -120,7 +120,7 @@ enum tca_id { TCA_ID_UNSPEC = 0, TCA_ID_POLICE = 1, TCA_ID_GACT = TCA_ACT_GACT, - TCA_ID_IPT = TCA_ACT_IPT, + TCA_ID_IPT = TCA_ACT_IPT, /* Obsoleted, can be reused */ TCA_ID_PEDIT = TCA_ACT_PEDIT, TCA_ID_MIRRED = TCA_ACT_MIRRED, TCA_ID_NAT = TCA_ACT_NAT, @@ -280,37 +280,6 @@ struct tc_u32_pcnt { #define TC_U32_MAXDEPTH 8 - -/* RSVP filter */ - -enum { - TCA_RSVP_UNSPEC, - TCA_RSVP_CLASSID, - TCA_RSVP_DST, - TCA_RSVP_SRC, - TCA_RSVP_PINFO, - TCA_RSVP_POLICE, - TCA_RSVP_ACT, - __TCA_RSVP_MAX -}; - -#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) - -struct tc_rsvp_gpi { - __u32 key; - __u32 mask; - int offset; -}; - -struct tc_rsvp_pinfo { - struct tc_rsvp_gpi dpi; - struct tc_rsvp_gpi spi; - __u8 protocol; - __u8 tunnelid; - __u8 tunnelhdr; - __u8 pad; -}; - /* ROUTE filter */ enum { @@ -341,22 +310,6 @@ enum { #define TCA_FW_MAX (__TCA_FW_MAX - 1) -/* TC index filter */ - -enum { - TCA_TCINDEX_UNSPEC, - TCA_TCINDEX_HASH, - TCA_TCINDEX_MASK, - TCA_TCINDEX_SHIFT, - TCA_TCINDEX_FALL_THROUGH, - TCA_TCINDEX_CLASSID, - TCA_TCINDEX_POLICE, - TCA_TCINDEX_ACT, - __TCA_TCINDEX_MAX -}; - -#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) - /* Flow filter */ enum { diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f762a10bfb78..a3cd0c2dc995 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -477,115 +477,6 @@ enum { #define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) - -/* CBQ section */ - -#define TC_CBQ_MAXPRIO 8 -#define TC_CBQ_MAXLEVEL 8 -#define TC_CBQ_DEF_EWMA 5 - -struct tc_cbq_lssopt { - unsigned char change; - unsigned char flags; -#define TCF_CBQ_LSS_BOUNDED 1 -#define TCF_CBQ_LSS_ISOLATED 2 - unsigned char ewma_log; - unsigned char level; -#define TCF_CBQ_LSS_FLAGS 1 -#define TCF_CBQ_LSS_EWMA 2 -#define TCF_CBQ_LSS_MAXIDLE 4 -#define TCF_CBQ_LSS_MINIDLE 8 -#define TCF_CBQ_LSS_OFFTIME 0x10 -#define TCF_CBQ_LSS_AVPKT 0x20 - __u32 maxidle; - __u32 minidle; - __u32 offtime; - __u32 avpkt; -}; - -struct tc_cbq_wrropt { - unsigned char flags; - unsigned char priority; - unsigned char cpriority; - unsigned char __reserved; - __u32 allot; - __u32 weight; -}; - -struct tc_cbq_ovl { - unsigned char strategy; -#define TC_CBQ_OVL_CLASSIC 0 -#define TC_CBQ_OVL_DELAY 1 -#define TC_CBQ_OVL_LOWPRIO 2 -#define TC_CBQ_OVL_DROP 3 -#define TC_CBQ_OVL_RCLASSIC 4 - unsigned char priority2; - __u16 pad; - __u32 penalty; -}; - -struct tc_cbq_police { - unsigned char police; - unsigned char __res1; - unsigned short __res2; -}; - -struct tc_cbq_fopt { - __u32 split; - __u32 defmap; - __u32 defchange; -}; - -struct tc_cbq_xstats { - __u32 borrows; - __u32 overactions; - __s32 avgidle; - __s32 undertime; -}; - -enum { - TCA_CBQ_UNSPEC, - TCA_CBQ_LSSOPT, - TCA_CBQ_WRROPT, - TCA_CBQ_FOPT, - TCA_CBQ_OVL_STRATEGY, - TCA_CBQ_RATE, - TCA_CBQ_RTAB, - TCA_CBQ_POLICE, - __TCA_CBQ_MAX, -}; - -#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) - -/* dsmark section */ - -enum { - TCA_DSMARK_UNSPEC, - TCA_DSMARK_INDICES, - TCA_DSMARK_DEFAULT_INDEX, - TCA_DSMARK_SET_TC_INDEX, - TCA_DSMARK_MASK, - TCA_DSMARK_VALUE, - __TCA_DSMARK_MAX, -}; - -#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) - -/* ATM section */ - -enum { - TCA_ATM_UNSPEC, - TCA_ATM_FD, /* file/socket descriptor */ - TCA_ATM_PTR, /* pointer to descriptor - later */ - TCA_ATM_HDR, /* LL header */ - TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ - TCA_ATM_ADDR, /* PVC address (for output only) */ - TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ - __TCA_ATM_MAX, -}; - -#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) - /* Network emulator */ enum { diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 837fcd4b0abc..b531e3ef011a 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -160,6 +160,8 @@ enum { SMC_NLA_LGR_D_CHID, /* u16 */ SMC_NLA_LGR_D_PAD, /* flag */ SMC_NLA_LGR_D_V2_COMMON, /* nest */ + SMC_NLA_LGR_D_EXT_GID, /* u64 */ + SMC_NLA_LGR_D_PEER_EXT_GID, /* u64 */ __SMC_NLA_LGR_D_MAX, SMC_NLA_LGR_D_MAX = __SMC_NLA_LGR_D_MAX - 1 }; diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 8cb3a6fef553..58eceb7f5df2 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -107,6 +107,8 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ __aligned_u64 my_gid; /* My GID */ __aligned_u64 token; /* Token of DMB */ __aligned_u64 peer_token; /* Token of remote DMBE */ + __aligned_u64 peer_gid_ext; /* Peer GID (extended part) */ + __aligned_u64 my_gid_ext; /* My GID (extended part) */ }; #endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h deleted file mode 100644 index c48d7da6750d..000000000000 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_TC_IPT_H -#define __LINUX_TC_IPT_H - -#include <linux/pkt_cls.h> - -enum { - TCA_IPT_UNSPEC, - TCA_IPT_TABLE, - TCA_IPT_HOOK, - TCA_IPT_INDEX, - TCA_IPT_CNT, - TCA_IPT_TM, - TCA_IPT_TARG, - TCA_IPT_PAD, - __TCA_IPT_MAX -}; -#define TCA_IPT_MAX (__TCA_IPT_MAX - 1) - -#endif diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h index 2500a0005d05..c61e76f3c23b 100644 --- a/include/uapi/linux/tc_act/tc_mirred.h +++ b/include/uapi/linux/tc_act/tc_mirred.h @@ -21,6 +21,7 @@ enum { TCA_MIRRED_TM, TCA_MIRRED_PARMS, TCA_MIRRED_PAD, + TCA_MIRRED_BLOCKID, __TCA_MIRRED_MAX }; #define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1) diff --git a/io_uring/poll.c b/io_uring/poll.c index d38d05edb4fa..d59b74a99d4e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -366,11 +366,16 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) static void __io_poll_execute(struct io_kiocb *req, int mask) { + unsigned flags = 0; + io_req_set_res(req, mask, 0); req->io_task_work.func = io_poll_task_func; trace_io_uring_task_add(req, mask); - __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE); + + if (!(req->flags & REQ_F_POLL_NO_LAZY)) + flags = IOU_F_TWQ_LAZY_WAKE; + __io_req_task_work_add(req, flags); } static inline void io_poll_execute(struct io_kiocb *req, int res) @@ -526,10 +531,19 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, poll->head = head; poll->wait.private = (void *) wqe_private; - if (poll->events & EPOLLEXCLUSIVE) + if (poll->events & EPOLLEXCLUSIVE) { + /* + * Exclusive waits may only wake a limited amount of entries + * rather than all of them, this may interfere with lazy + * wake if someone does wait(events > 1). Ensure we don't do + * lazy wake for those, as we need to process each one as they + * come in. + */ + req->flags |= REQ_F_POLL_NO_LAZY; add_wait_queue_exclusive(head, &poll->wait); - else + } else { add_wait_queue(head, &poll->wait); + } } static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index acbc2924ecd2..7d3ef62e620a 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -7,7 +7,7 @@ #include <linux/nospec.h> #include <uapi/linux/io_uring.h> -#include <uapi/asm-generic/ioctls.h> +#include <asm/ioctls.h> #include "io_uring.h" #include "rsrc.h" diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 1cc3b1c595d7..2fd510256604 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -94,7 +94,6 @@ config KEXEC_JUMP config CRASH_DUMP bool "kernel crash dumps" depends on ARCH_SUPPORTS_CRASH_DUMP - depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE help diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 4ce95acfcaa7..f526b7573e97 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 13358675ff2e..0bdbbbeab155 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; - bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL); + bool bypass_spec_v1 = bpf_bypass_spec_v1(); u64 array_size, mask64; struct bpf_array *array; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 63b4dc495125..e8e910395bf6 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -260,15 +260,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) BTF_SET_START(sleepable_lsm_hooks) BTF_ID(func, bpf_lsm_bpf) BTF_ID(func, bpf_lsm_bpf_map) -BTF_ID(func, bpf_lsm_bpf_map_create) -BTF_ID(func, bpf_lsm_bpf_map_free) +BTF_ID(func, bpf_lsm_bpf_map_alloc_security) +BTF_ID(func, bpf_lsm_bpf_map_free_security) BTF_ID(func, bpf_lsm_bpf_prog) -BTF_ID(func, bpf_lsm_bpf_prog_load) -BTF_ID(func, bpf_lsm_bpf_prog_free) -BTF_ID(func, bpf_lsm_bpf_token_create) -BTF_ID(func, bpf_lsm_bpf_token_free) -BTF_ID(func, bpf_lsm_bpf_token_cmd) -BTF_ID(func, bpf_lsm_bpf_token_capable) BTF_ID(func, bpf_lsm_bprm_check_security) BTF_ID(func, bpf_lsm_bprm_committed_creds) BTF_ID(func, bpf_lsm_bprm_committing_creds) @@ -363,8 +357,9 @@ BTF_ID(func, bpf_lsm_userns_create) BTF_SET_END(sleepable_lsm_hooks) BTF_SET_START(untrusted_lsm_hooks) -BTF_ID(func, bpf_lsm_bpf_map_free) -BTF_ID(func, bpf_lsm_bpf_prog_free) +BTF_ID(func, bpf_lsm_bpf_map_free_security) +BTF_ID(func, bpf_lsm_bpf_prog_alloc_security) +BTF_ID(func, bpf_lsm_bpf_prog_free_security) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_ID(func, bpf_lsm_file_free_security) #ifdef CONFIG_SECURITY_NETWORK diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index d56433bf8aba..51e8b4bee0c8 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6765,222 +6765,64 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr return btf_check_func_type_match(log, btf1, t1, btf2, t2); } -static int btf_check_func_arg_match(struct bpf_verifier_env *env, - const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs, - bool ptr_to_mem_ok, - bool processing_call) +static bool btf_is_dynptr_ptr(const struct btf *btf, const struct btf_type *t) { - enum bpf_prog_type prog_type = resolve_prog_type(env->prog); - struct bpf_verifier_log *log = &env->log; - const char *func_name, *ref_tname; - const struct btf_type *t, *ref_t; - const struct btf_param *args; - u32 i, nargs, ref_id; - int ret; - - t = btf_type_by_id(btf, func_id); - if (!t || !btf_type_is_func(t)) { - /* These checks were already done by the verifier while loading - * struct bpf_func_info or in add_kfunc_call(). - */ - bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n", - func_id); - return -EFAULT; - } - func_name = btf_name_by_offset(btf, t->name_off); - - t = btf_type_by_id(btf, t->type); - if (!t || !btf_type_is_func_proto(t)) { - bpf_log(log, "Invalid BTF of func %s\n", func_name); - return -EFAULT; - } - args = (const struct btf_param *)(t + 1); - nargs = btf_type_vlen(t); - if (nargs > MAX_BPF_FUNC_REG_ARGS) { - bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs, - MAX_BPF_FUNC_REG_ARGS); - return -EINVAL; - } - - /* check that BTF function arguments match actual types that the - * verifier sees. - */ - for (i = 0; i < nargs; i++) { - enum bpf_arg_type arg_type = ARG_DONTCARE; - u32 regno = i + 1; - struct bpf_reg_state *reg = ®s[regno]; - - t = btf_type_skip_modifiers(btf, args[i].type, NULL); - if (btf_type_is_scalar(t)) { - if (reg->type == SCALAR_VALUE) - continue; - bpf_log(log, "R%d is not a scalar\n", regno); - return -EINVAL; - } - - if (!btf_type_is_ptr(t)) { - bpf_log(log, "Unrecognized arg#%d type %s\n", - i, btf_type_str(t)); - return -EINVAL; - } - - ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); - ref_tname = btf_name_by_offset(btf, ref_t->name_off); - - ret = check_func_arg_reg_off(env, reg, regno, arg_type); - if (ret < 0) - return ret; + const char *name; - if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { - /* If function expects ctx type in BTF check that caller - * is passing PTR_TO_CTX. - */ - if (reg->type != PTR_TO_CTX) { - bpf_log(log, - "arg#%d expected pointer to ctx, but got %s\n", - i, btf_type_str(t)); - return -EINVAL; - } - } else if (ptr_to_mem_ok && processing_call) { - const struct btf_type *resolve_ret; - u32 type_size; + t = btf_type_by_id(btf, t->type); /* skip PTR */ - resolve_ret = btf_resolve_size(btf, ref_t, &type_size); - if (IS_ERR(resolve_ret)) { - bpf_log(log, - "arg#%d reference type('%s %s') size cannot be determined: %ld\n", - i, btf_type_str(ref_t), ref_tname, - PTR_ERR(resolve_ret)); - return -EINVAL; - } + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); - if (check_mem_reg(env, reg, regno, type_size)) - return -EINVAL; - } else { - bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i, - func_name, func_id); - return -EINVAL; - } + /* allow either struct or struct forward declaration */ + if (btf_type_is_struct(t) || + (btf_type_is_fwd(t) && btf_type_kflag(t) == 0)) { + name = btf_str_by_offset(btf, t->name_off); + return name && strcmp(name, "bpf_dynptr") == 0; } - return 0; -} - -/* Compare BTF of a function declaration with given bpf_reg_state. - * Returns: - * EFAULT - there is a verifier bug. Abort verification. - * EINVAL - there is a type mismatch or BTF is not available. - * 0 - BTF matches with what bpf_reg_state expects. - * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. - */ -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) -{ - struct bpf_prog *prog = env->prog; - struct btf *btf = prog->aux->btf; - bool is_global; - u32 btf_id; - int err; - - if (!prog->aux->func_info) - return -EINVAL; - - btf_id = prog->aux->func_info[subprog].type_id; - if (!btf_id) - return -EFAULT; - - if (prog->aux->func_info_aux[subprog].unreliable) - return -EINVAL; - - is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false); - - /* Compiler optimizations can remove arguments from static functions - * or mismatched type can be passed into a global function. - * In such cases mark the function as unreliable from BTF point of view. - */ - if (err) - prog->aux->func_info_aux[subprog].unreliable = true; - return err; -} - -/* Compare BTF of a function call with given bpf_reg_state. - * Returns: - * EFAULT - there is a verifier bug. Abort verification. - * EINVAL - there is a type mismatch or BTF is not available. - * 0 - BTF matches with what bpf_reg_state expects. - * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. - * - * NOTE: the code is duplicated from btf_check_subprog_arg_match() - * because btf_check_func_arg_match() is still doing both. Once that - * function is split in 2, we can call from here btf_check_subprog_arg_match() - * first, and then treat the calling part in a new code path. - */ -int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) -{ - struct bpf_prog *prog = env->prog; - struct btf *btf = prog->aux->btf; - bool is_global; - u32 btf_id; - int err; - - if (!prog->aux->func_info) - return -EINVAL; - - btf_id = prog->aux->func_info[subprog].type_id; - if (!btf_id) - return -EFAULT; - - if (prog->aux->func_info_aux[subprog].unreliable) - return -EINVAL; - - is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true); - - /* Compiler optimizations can remove arguments from static functions - * or mismatched type can be passed into a global function. - * In such cases mark the function as unreliable from BTF point of view. - */ - if (err) - prog->aux->func_info_aux[subprog].unreliable = true; - return err; + return false; } -/* Convert BTF of a function into bpf_reg_state if possible +/* Process BTF of a function to produce high-level expectation of function + * arguments (like ARG_PTR_TO_CTX, or ARG_PTR_TO_MEM, etc). This information + * is cached in subprog info for reuse. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - cannot convert BTF. - * 0 - Successfully converted BTF into bpf_reg_state - * (either PTR_TO_CTX or SCALAR_VALUE). + * 0 - Successfully processed BTF and constructed argument expectations. */ -int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs, u32 *arg_cnt) +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) { + bool is_global = subprog_aux(env, subprog)->linkage == BTF_FUNC_GLOBAL; + struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_verifier_log *log = &env->log; struct bpf_prog *prog = env->prog; enum bpf_prog_type prog_type = prog->type; struct btf *btf = prog->aux->btf; const struct btf_param *args; - const struct btf_type *t, *ref_t; + const struct btf_type *t, *ref_t, *fn_t; u32 i, nargs, btf_id; const char *tname; - if (!prog->aux->func_info || - prog->aux->func_info_aux[subprog].linkage != BTF_FUNC_GLOBAL) { + if (sub->args_cached) + return 0; + + if (!prog->aux->func_info) { bpf_log(log, "Verifier bug\n"); return -EFAULT; } btf_id = prog->aux->func_info[subprog].type_id; if (!btf_id) { + if (!is_global) /* not fatal for static funcs */ + return -EINVAL; bpf_log(log, "Global functions need valid BTF\n"); return -EFAULT; } - t = btf_type_by_id(btf, btf_id); - if (!t || !btf_type_is_func(t)) { + fn_t = btf_type_by_id(btf, btf_id); + if (!fn_t || !btf_type_is_func(fn_t)) { /* These checks were already done by the verifier while loading * struct bpf_func_info */ @@ -6988,11 +6830,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, subprog); return -EFAULT; } - tname = btf_name_by_offset(btf, t->name_off); - - if (log->level & BPF_LOG_LEVEL) - bpf_log(log, "Validating %s() func#%d...\n", - tname, subprog); + tname = btf_name_by_offset(btf, fn_t->name_off); if (prog->aux->func_info_aux[subprog].unreliable) { bpf_log(log, "Verifier bug in function %s()\n", tname); @@ -7001,7 +6839,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, if (prog_type == BPF_PROG_TYPE_EXT) prog_type = prog->aux->dst_prog->type; - t = btf_type_by_id(btf, t->type); + t = btf_type_by_id(btf, fn_t->type); if (!t || !btf_type_is_func_proto(t)) { bpf_log(log, "Invalid type of function %s()\n", tname); return -EFAULT; @@ -7013,7 +6851,6 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; } - *arg_cnt = nargs; /* check that function returns int, exception cb also requires this */ t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) @@ -7028,24 +6865,54 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, * Only PTR_TO_CTX and SCALAR are supported atm. */ for (i = 0; i < nargs; i++) { - struct bpf_reg_state *reg = ®s[i + 1]; + bool is_nonnull = false; + const char *tag; t = btf_type_by_id(btf, args[i].type); + + tag = btf_find_decl_tag_value(btf, fn_t, i, "arg:"); + if (IS_ERR(tag) && PTR_ERR(tag) == -ENOENT) { + tag = NULL; + } else if (IS_ERR(tag)) { + bpf_log(log, "arg#%d type's tag fetching failure: %ld\n", i, PTR_ERR(tag)); + return PTR_ERR(tag); + } + /* 'arg:<tag>' decl_tag takes precedence over derivation of + * register type from BTF type itself + */ + if (tag) { + /* disallow arg tags in static subprogs */ + if (!is_global) { + bpf_log(log, "arg#%d type tag is not supported in static functions\n", i); + return -EOPNOTSUPP; + } + if (strcmp(tag, "ctx") == 0) { + sub->args[i].arg_type = ARG_PTR_TO_CTX; + continue; + } + if (strcmp(tag, "nonnull") == 0) + is_nonnull = true; + } + while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (btf_type_is_int(t) || btf_is_any_enum(t)) { - reg->type = SCALAR_VALUE; + sub->args[i].arg_type = ARG_ANYTHING; continue; } - if (btf_type_is_ptr(t)) { - if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { - reg->type = PTR_TO_CTX; - continue; - } + if (btf_type_is_ptr(t) && btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { + sub->args[i].arg_type = ARG_PTR_TO_CTX; + continue; + } + if (btf_type_is_ptr(t) && btf_is_dynptr_ptr(btf, t)) { + sub->args[i].arg_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY; + continue; + } + if (is_global && btf_type_is_ptr(t)) { + u32 mem_size; t = btf_type_skip_modifiers(btf, t->type, NULL); - - ref_t = btf_resolve_size(btf, t, ®->mem_size); + ref_t = btf_resolve_size(btf, t, &mem_size); if (IS_ERR(ref_t)) { bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", @@ -7054,15 +6921,22 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, return -EINVAL; } - reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; - reg->id = ++env->id_gen; - + sub->args[i].arg_type = is_nonnull ? ARG_PTR_TO_MEM : ARG_PTR_TO_MEM_OR_NULL; + sub->args[i].mem_size = mem_size; continue; } + if (is_nonnull) { + bpf_log(log, "arg#%d marked as non-null, but is not a pointer type\n", i); + return -EINVAL; + } bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n", i, btf_type_str(t), tname); return -EINVAL; } + + sub->arg_cnt = nargs; + sub->args_cached = true; + return 0; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 98e0e3835b28..491d20038cbe 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1630,7 +1630,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -2191,7 +2191,7 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -2348,7 +2348,7 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 14ace23d517b..ea6843be2616 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -682,7 +682,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || - !bpf_token_capable(fp->aux->token, CAP_BPF)) + !bpf_capable()) return; bpf_prog_ksym_set_addr(fp); @@ -2779,7 +2779,6 @@ void bpf_prog_free(struct bpf_prog *fp) if (aux->dst_prog) bpf_prog_put(aux->dst_prog); - bpf_token_put(aux->token); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 07fd4b5704f3..be72824f32b2 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1679,7 +1679,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_base_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1730,7 +1730,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) break; } - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return NULL; switch (func_id) { @@ -1788,7 +1788,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) break; } - if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (!perfmon_capable()) return NULL; switch (func_id) { diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 4383b3d13a55..41e0a55c35f5 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -20,7 +20,6 @@ #include <linux/filter.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> -#include <linux/kstrtox.h> #include "preload/bpf_preload.h" enum bpf_type { @@ -99,9 +98,9 @@ static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; -struct inode *bpf_get_inode(struct super_block *sb, - const struct inode *dir, - umode_t mode) +static struct inode *bpf_get_inode(struct super_block *sb, + const struct inode *dir, + umode_t mode) { struct inode *inode; @@ -595,145 +594,13 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ } EXPORT_SYMBOL(bpf_prog_get_type_path); -struct bpffs_btf_enums { - const struct btf *btf; - const struct btf_type *cmd_t; - const struct btf_type *map_t; - const struct btf_type *prog_t; - const struct btf_type *attach_t; -}; - -static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) -{ - const struct btf *btf; - const struct btf_type *t; - const char *name; - int i, n; - - memset(info, 0, sizeof(*info)); - - btf = bpf_get_btf_vmlinux(); - if (IS_ERR(btf)) - return PTR_ERR(btf); - if (!btf) - return -ENOENT; - - info->btf = btf; - - for (i = 1, n = btf_nr_types(btf); i < n; i++) { - t = btf_type_by_id(btf, i); - if (!btf_type_is_enum(t)) - continue; - - name = btf_name_by_offset(btf, t->name_off); - if (!name) - continue; - - if (strcmp(name, "bpf_cmd") == 0) - info->cmd_t = t; - else if (strcmp(name, "bpf_map_type") == 0) - info->map_t = t; - else if (strcmp(name, "bpf_prog_type") == 0) - info->prog_t = t; - else if (strcmp(name, "bpf_attach_type") == 0) - info->attach_t = t; - else - continue; - - if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) - return 0; - } - - return -ESRCH; -} - -static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, - const char *prefix, const char *str, int *value) -{ - const struct btf_enum *e; - const char *name; - int i, n, pfx_len = strlen(prefix); - - *value = 0; - - if (!btf || !enum_t) - return false; - - for (i = 0, n = btf_vlen(enum_t); i < n; i++) { - e = &btf_enum(enum_t)[i]; - - name = btf_name_by_offset(btf, e->name_off); - if (!name || strncasecmp(name, prefix, pfx_len) != 0) - continue; - - /* match symbolic name case insensitive and ignoring prefix */ - if (strcasecmp(name + pfx_len, str) == 0) { - *value = e->val; - return true; - } - } - - return false; -} - -static void seq_print_delegate_opts(struct seq_file *m, - const char *opt_name, - const struct btf *btf, - const struct btf_type *enum_t, - const char *prefix, - u64 delegate_msk, u64 any_msk) -{ - const struct btf_enum *e; - bool first = true; - const char *name; - u64 msk; - int i, n, pfx_len = strlen(prefix); - - delegate_msk &= any_msk; /* clear unknown bits */ - - if (delegate_msk == 0) - return; - - seq_printf(m, ",%s", opt_name); - if (delegate_msk == any_msk) { - seq_printf(m, "=any"); - return; - } - - if (btf && enum_t) { - for (i = 0, n = btf_vlen(enum_t); i < n; i++) { - e = &btf_enum(enum_t)[i]; - name = btf_name_by_offset(btf, e->name_off); - if (!name || strncasecmp(name, prefix, pfx_len) != 0) - continue; - msk = 1ULL << e->val; - if (delegate_msk & msk) { - /* emit lower-case name without prefix */ - seq_printf(m, "%c", first ? '=' : ':'); - name += pfx_len; - while (*name) { - seq_printf(m, "%c", tolower(*name)); - name++; - } - - delegate_msk &= ~msk; - first = false; - } - } - } - if (delegate_msk) - seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); -} - /* * Display the mount options in /proc/mounts. */ static int bpf_show_options(struct seq_file *m, struct dentry *root) { - struct bpf_mount_opts *opts = root->d_sb->s_fs_info; struct inode *inode = d_inode(root); umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; - u64 mask; if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", @@ -743,35 +610,6 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) from_kgid_munged(&init_user_ns, inode->i_gid)); if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); - - if (opts->delegate_cmds || opts->delegate_maps || - opts->delegate_progs || opts->delegate_attachs) { - struct bpffs_btf_enums info; - - /* ignore errors, fallback to hex */ - (void)find_bpffs_btf_enums(&info); - - mask = (1ULL << __MAX_BPF_CMD) - 1; - seq_print_delegate_opts(m, "delegate_cmds", - info.btf, info.cmd_t, "BPF_", - opts->delegate_cmds, mask); - - mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; - seq_print_delegate_opts(m, "delegate_maps", - info.btf, info.map_t, "BPF_MAP_TYPE_", - opts->delegate_maps, mask); - - mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; - seq_print_delegate_opts(m, "delegate_progs", - info.btf, info.prog_t, "BPF_PROG_TYPE_", - opts->delegate_progs, mask); - - mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; - seq_print_delegate_opts(m, "delegate_attachs", - info.btf, info.attach_t, "BPF_", - opts->delegate_attachs, mask); - } - return 0; } @@ -786,7 +624,7 @@ static void bpf_free_inode(struct inode *inode) free_inode_nonrcu(inode); } -const struct super_operations bpf_super_ops = { +static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, @@ -797,30 +635,28 @@ enum { OPT_UID, OPT_GID, OPT_MODE, - OPT_DELEGATE_CMDS, - OPT_DELEGATE_MAPS, - OPT_DELEGATE_PROGS, - OPT_DELEGATE_ATTACHS, }; static const struct fs_parameter_spec bpf_fs_parameters[] = { fsparam_u32 ("uid", OPT_UID), fsparam_u32 ("gid", OPT_GID), fsparam_u32oct ("mode", OPT_MODE), - fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), - fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), - fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), - fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), {} }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; +}; + static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct bpf_mount_opts *opts = fc->s_fs_info; + struct bpf_mount_opts *opts = fc->fs_private; struct fs_parse_result result; kuid_t uid; kgid_t gid; - int opt, err; + int opt; opt = fs_parse(fc, bpf_fs_parameters, param, &result); if (opt < 0) { @@ -872,71 +708,9 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) case OPT_MODE: opts->mode = result.uint_32 & S_IALLUGO; break; - case OPT_DELEGATE_CMDS: - case OPT_DELEGATE_MAPS: - case OPT_DELEGATE_PROGS: - case OPT_DELEGATE_ATTACHS: { - struct bpffs_btf_enums info; - const struct btf_type *enum_t; - const char *enum_pfx; - u64 *delegate_msk, msk = 0; - char *p; - int val; - - /* ignore errors, fallback to hex */ - (void)find_bpffs_btf_enums(&info); - - switch (opt) { - case OPT_DELEGATE_CMDS: - delegate_msk = &opts->delegate_cmds; - enum_t = info.cmd_t; - enum_pfx = "BPF_"; - break; - case OPT_DELEGATE_MAPS: - delegate_msk = &opts->delegate_maps; - enum_t = info.map_t; - enum_pfx = "BPF_MAP_TYPE_"; - break; - case OPT_DELEGATE_PROGS: - delegate_msk = &opts->delegate_progs; - enum_t = info.prog_t; - enum_pfx = "BPF_PROG_TYPE_"; - break; - case OPT_DELEGATE_ATTACHS: - delegate_msk = &opts->delegate_attachs; - enum_t = info.attach_t; - enum_pfx = "BPF_"; - break; - default: - return -EINVAL; - } - - while ((p = strsep(¶m->string, ":"))) { - if (strcmp(p, "any") == 0) { - msk |= ~0ULL; - } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { - msk |= 1ULL << val; - } else { - err = kstrtou64(p, 0, &msk); - if (err) - return err; - } - } - - /* Setting delegation mount options requires privileges */ - if (msk && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - *delegate_msk |= msk; - break; - } - default: - /* ignore unknown mount options */ - break; } return 0; - bad_value: return invalfc(fc, "Bad value for '%s'", param->key); } @@ -1010,14 +784,10 @@ out: static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; - struct bpf_mount_opts *opts = sb->s_fs_info; + struct bpf_mount_opts *opts = fc->fs_private; struct inode *inode; int ret; - /* Mounting an instance of BPF FS requires privileges */ - if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) - return -EPERM; - ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); if (ret) return ret; @@ -1041,7 +811,7 @@ static int bpf_get_tree(struct fs_context *fc) static void bpf_free_fc(struct fs_context *fc) { - kfree(fc->s_fs_info); + kfree(fc->fs_private); } static const struct fs_context_operations bpf_context_ops = { @@ -1065,32 +835,17 @@ static int bpf_init_fs_context(struct fs_context *fc) opts->uid = current_fsuid(); opts->gid = current_fsgid(); - /* start out with no BPF token delegation enabled */ - opts->delegate_cmds = 0; - opts->delegate_maps = 0; - opts->delegate_progs = 0; - opts->delegate_attachs = 0; - - fc->s_fs_info = opts; + fc->fs_private = opts; fc->ops = &bpf_context_ops; return 0; } -static void bpf_kill_super(struct super_block *sb) -{ - struct bpf_mount_opts *opts = sb->s_fs_info; - - kill_litter_super(sb); - kfree(opts); -} - static struct file_system_type bpf_fs_type = { .owner = THIS_MODULE, .name = "bpf", .init_fs_context = bpf_init_fs_context, .parameters = bpf_fs_parameters, - .kill_sb = bpf_kill_super, - .fs_flags = FS_USERNS_MOUNT, + .kill_sb = kill_litter_super, }; static int __init bpf_init(void) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 6a51cfe4c2d6..aa0fbf000a12 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -490,27 +490,6 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); } -static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) -{ - struct llist_node *first; - unsigned int obj_size; - - first = c->free_llist.first; - if (!first) - return 0; - - if (c->percpu_size) - obj_size = pcpu_alloc_size(((void **)first)[1]); - else - obj_size = ksize(first); - if (obj_size != c->unit_size) { - WARN_ONCE(1, "bpf_mem_cache[%u]: percpu %d, unexpected object size %u, expect %u\n", - idx, c->percpu_size, obj_size, c->unit_size); - return -EINVAL; - } - return 0; -} - /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * @@ -521,10 +500,10 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; - int cpu, i, err, unit_size, percpu_size = 0; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; + int cpu, i, unit_size, percpu_size = 0; /* room for llist_node and per-cpu pointer */ if (percpu) @@ -560,7 +539,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; - err = 0; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif @@ -574,28 +552,12 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c->tgt = c; init_refill_work(c); - /* Another bpf_mem_cache will be used when allocating - * c->unit_size in bpf_mem_alloc(), so doesn't prefill - * for the bpf_mem_cache because these free objects will - * never be used. - */ - if (i != bpf_mem_cache_idx(c->unit_size)) - continue; prefill_mem_cache(c, cpu); - err = check_obj_size(c, i); - if (err) - goto out; } } -out: ma->caches = pcc; - /* refill_work is either zeroed or initialized, so it is safe to - * call irq_work_sync(). - */ - if (err) - bpf_mem_alloc_destroy(ma); - return err; + return 0; } static void drain_mem_cache(struct bpf_mem_cache *c) @@ -869,7 +831,7 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) void *ret; if (!size) - return ZERO_SIZE_PTR; + return NULL; idx = bpf_mem_cache_idx(size + LLIST_NODE_SZ); if (idx < 0) @@ -879,26 +841,17 @@ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) return !ret ? NULL : ret + LLIST_NODE_SZ; } -static notrace int bpf_mem_free_idx(void *ptr, bool percpu) -{ - size_t size; - - if (percpu) - size = pcpu_alloc_size(*((void **)ptr)); - else - size = ksize(ptr - LLIST_NODE_SZ); - return bpf_mem_cache_idx(size); -} - void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) { + struct bpf_mem_cache *c; int idx; if (!ptr) return; - idx = bpf_mem_free_idx(ptr, ma->percpu); - if (idx < 0) + c = *(void **)(ptr - LLIST_NODE_SZ); + idx = bpf_mem_cache_idx(c->unit_size); + if (WARN_ON_ONCE(idx < 0)) return; unit_free(this_cpu_ptr(ma->caches)->cache + idx, ptr); @@ -906,13 +859,15 @@ void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr) { + struct bpf_mem_cache *c; int idx; if (!ptr) return; - idx = bpf_mem_free_idx(ptr, ma->percpu); - if (idx < 0) + c = *(void **)(ptr - LLIST_NODE_SZ); + idx = bpf_mem_cache_idx(c->unit_size); + if (WARN_ON_ONCE(idx < 0)) return; unit_free_rcu(this_cpu_ptr(ma->caches)->cache + idx, ptr); @@ -986,41 +941,3 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } - -/* The alignment of dynamic per-cpu area is 8, so c->unit_size and the - * actual size of dynamic per-cpu area will always be matched and there is - * no need to adjust size_index for per-cpu allocation. However for the - * simplicity of the implementation, use an unified size_index for both - * kmalloc and per-cpu allocation. - */ -static __init int bpf_mem_cache_adjust_size(void) -{ - unsigned int size; - - /* Adjusting the indexes in size_index() according to the object_size - * of underlying slab cache, so bpf_mem_alloc() will select a - * bpf_mem_cache with unit_size equal to the object_size of - * the underlying slab cache. - * - * The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is - * 256-bytes, so only do adjustment for [8-bytes, 192-bytes]. - */ - for (size = 192; size >= 8; size -= 8) { - unsigned int kmalloc_size, index; - - kmalloc_size = kmalloc_size_roundup(size); - if (kmalloc_size == size) - continue; - - if (kmalloc_size <= 192) - index = size_index[(kmalloc_size - 1) / 8]; - else - index = fls(kmalloc_size - 1) - 1; - /* Only overwrite if necessary */ - if (size_index[(size - 1) / 8] != index) - size_index[(size - 1) / 8] = index; - } - - return 0; -} -subsys_initcall(bpf_mem_cache_adjust_size); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8faa1a20edf8..1bf9805ee185 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1011,8 +1011,8 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(struct bpf_map *map, struct bpf_token *token, - const struct btf *btf, u32 btf_key_id, u32 btf_value_id) +static int map_check_btf(struct bpf_map *map, const struct btf *btf, + u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; u32 key_size, value_size; @@ -1040,7 +1040,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, if (!IS_ERR_OR_NULL(map->record)) { int i; - if (!bpf_token_capable(token, CAP_BPF)) { + if (!bpf_capable()) { ret = -EPERM; goto free_map_tab; } @@ -1123,17 +1123,11 @@ free_map_tab: return ret; } -static bool bpf_net_capable(void) -{ - return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); -} - -#define BPF_MAP_CREATE_LAST_FIELD map_token_fd +#define BPF_MAP_CREATE_LAST_FIELD map_extra /* called via syscall */ static int map_create(union bpf_attr *attr) { const struct bpf_map_ops *ops; - struct bpf_token *token = NULL; int numa_node = bpf_map_attr_numa_node(attr); u32 map_type = attr->map_type; struct bpf_map *map; @@ -1184,32 +1178,14 @@ static int map_create(union bpf_attr *attr) if (!ops->map_mem_usage) return -EINVAL; - if (attr->map_token_fd) { - token = bpf_token_get_from_fd(attr->map_token_fd); - if (IS_ERR(token)) - return PTR_ERR(token); - - /* if current token doesn't grant map creation permissions, - * then we can't use this token, so ignore it and rely on - * system-wide capabilities checks - */ - if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || - !bpf_token_allow_map_type(token, attr->map_type)) { - bpf_token_put(token); - token = NULL; - } - } - - err = -EPERM; - /* Intent here is for unprivileged_bpf_disabled to block BPF map * creation for unprivileged users; other actions depend * on fd availability and access to bpffs, so are dependent on * object creation success. Even with unprivileged BPF disabled, * capability checks are still carried out. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) - goto put_token; + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; /* check privileged map type permissions */ switch (map_type) { @@ -1242,27 +1218,25 @@ static int map_create(union bpf_attr *attr) case BPF_MAP_TYPE_LRU_PERCPU_HASH: case BPF_MAP_TYPE_STRUCT_OPS: case BPF_MAP_TYPE_CPUMAP: - if (!bpf_token_capable(token, CAP_BPF)) - goto put_token; + if (!bpf_capable()) + return -EPERM; break; case BPF_MAP_TYPE_SOCKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP_HASH: case BPF_MAP_TYPE_XSKMAP: - if (!bpf_token_capable(token, CAP_NET_ADMIN)) - goto put_token; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; break; default: WARN(1, "unsupported map type %d", map_type); - goto put_token; + return -EPERM; } map = ops->map_alloc(attr); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto put_token; - } + if (IS_ERR(map)) + return PTR_ERR(map); map->ops = ops; map->map_type = map_type; @@ -1299,7 +1273,7 @@ static int map_create(union bpf_attr *attr) map->btf = btf; if (attr->btf_value_type_id) { - err = map_check_btf(map, token, btf, attr->btf_key_type_id, + err = map_check_btf(map, btf, attr->btf_key_type_id, attr->btf_value_type_id); if (err) goto free_map; @@ -1311,16 +1285,15 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - err = security_bpf_map_create(map, attr, token); + err = security_bpf_map_alloc(map); if (err) - goto free_map_sec; + goto free_map; err = bpf_map_alloc_id(map); if (err) goto free_map_sec; bpf_map_save_memcg(map); - bpf_token_put(token); err = bpf_map_new_fd(map, f_flags); if (err < 0) { @@ -1341,8 +1314,6 @@ free_map_sec: free_map: btf_put(map->btf); map->ops->map_free(map); -put_token: - bpf_token_put(token); return err; } @@ -2173,7 +2144,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) kvfree(aux->func_info); kfree(aux->func_info_aux); free_uid(aux->user); - security_bpf_prog_free(aux->prog); + security_bpf_prog_free(aux); bpf_prog_free(aux->prog); } @@ -2619,15 +2590,13 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd +#define BPF_PROG_LOAD_LAST_FIELD log_true_size static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; struct btf *attach_btf = NULL; - struct bpf_token *token = NULL; - bool bpf_cap; int err; char license[128]; @@ -2644,31 +2613,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_TEST_REG_INVARIANTS)) return -EINVAL; - bpf_prog_load_fixup_attach_type(attr); - - if (attr->prog_token_fd) { - token = bpf_token_get_from_fd(attr->prog_token_fd); - if (IS_ERR(token)) - return PTR_ERR(token); - /* if current token doesn't grant prog loading permissions, - * then we can't use this token, so ignore it and rely on - * system-wide capabilities checks - */ - if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || - !bpf_token_allow_prog_type(token, attr->prog_type, - attr->expected_attach_type)) { - bpf_token_put(token); - token = NULL; - } - } - - bpf_cap = bpf_token_capable(token, CAP_BPF); - err = -EPERM; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && - !bpf_cap) - goto put_token; + !bpf_capable()) + return -EPERM; /* Intent here is for unprivileged_bpf_disabled to block BPF program * creation for unprivileged users; other actions depend @@ -2677,23 +2625,21 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) * capability checks are still carried out for these * and other operations. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_cap) - goto put_token; + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; if (attr->insn_cnt == 0 || - attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { - err = -E2BIG; - goto put_token; - } + attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) + return -E2BIG; if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && - !bpf_cap) - goto put_token; + !bpf_capable()) + return -EPERM; - if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) - goto put_token; - if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) - goto put_token; + if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + if (is_perfmon_prog_type(type) && !perfmon_capable()) + return -EPERM; /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog * or btf, we need to check which one it is @@ -2703,33 +2649,27 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (IS_ERR(dst_prog)) { dst_prog = NULL; attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); - if (IS_ERR(attach_btf)) { - err = -EINVAL; - goto put_token; - } + if (IS_ERR(attach_btf)) + return -EINVAL; if (!btf_is_kernel(attach_btf)) { /* attaching through specifying bpf_prog's BTF * objects directly might be supported eventually */ btf_put(attach_btf); - err = -ENOTSUPP; - goto put_token; + return -ENOTSUPP; } } } else if (attr->attach_btf_id) { /* fall back to vmlinux BTF, if BTF type ID is specified */ attach_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(attach_btf)) { - err = PTR_ERR(attach_btf); - goto put_token; - } - if (!attach_btf) { - err = -EINVAL; - goto put_token; - } + if (IS_ERR(attach_btf)) + return PTR_ERR(attach_btf); + if (!attach_btf) + return -EINVAL; btf_get(attach_btf); } + bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attach_btf, attr->attach_btf_id, dst_prog)) { @@ -2737,8 +2677,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - err = -EINVAL; - goto put_token; + return -EINVAL; } /* plain bpf_prog allocation */ @@ -2748,8 +2687,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - err = -EINVAL; - goto put_token; + return -ENOMEM; } prog->expected_attach_type = attr->expected_attach_type; @@ -2760,9 +2698,9 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; - /* move token into prog->aux, reuse taken refcnt */ - prog->aux->token = token; - token = NULL; + err = security_bpf_prog_alloc(prog->aux); + if (err) + goto free_prog; prog->aux->user = get_current_user(); prog->len = attr->insn_cnt; @@ -2771,12 +2709,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (copy_from_bpfptr(prog->insns, make_bpfptr(attr->insns, uattr.is_kernel), bpf_prog_insn_size(prog)) != 0) - goto free_prog; + goto free_prog_sec; /* copy eBPF program license from user space */ if (strncpy_from_bpfptr(license, make_bpfptr(attr->license, uattr.is_kernel), sizeof(license) - 1) < 0) - goto free_prog; + goto free_prog_sec; license[sizeof(license) - 1] = 0; /* eBPF programs must be GPL compatible to use GPL-ed functions */ @@ -2790,29 +2728,25 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_dev_bound_init(prog, attr); if (err) - goto free_prog; + goto free_prog_sec; } if (type == BPF_PROG_TYPE_EXT && dst_prog && bpf_prog_is_dev_bound(dst_prog->aux)) { err = bpf_prog_dev_bound_inherit(prog, dst_prog); if (err) - goto free_prog; + goto free_prog_sec; } /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); if (err < 0) - goto free_prog; + goto free_prog_sec; prog->aux->load_time = ktime_get_boottime_ns(); err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, sizeof(attr->prog_name)); if (err < 0) - goto free_prog; - - err = security_bpf_prog_load(prog, attr, token); - if (err) goto free_prog_sec; /* run eBPF verifier */ @@ -2858,16 +2792,13 @@ free_used_maps: */ __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); return err; - free_prog_sec: - security_bpf_prog_free(prog); -free_prog: free_uid(prog->aux->user); + security_bpf_prog_free(prog->aux); +free_prog: if (prog->aux->attach_btf) btf_put(prog->aux->attach_btf); bpf_prog_free(prog); -put_token: - bpf_token_put(token); return err; } @@ -3857,7 +3788,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, case BPF_PROG_TYPE_SK_LOOKUP: return attach_type == prog->expected_attach_type ? 0 : -EINVAL; case BPF_PROG_TYPE_CGROUP_SKB: - if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) /* cg-skb progs can be loaded by unpriv user. * check permissions at attach time. */ @@ -4060,7 +3991,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { - if (!bpf_net_capable()) + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (CHECK_ATTR(BPF_PROG_QUERY)) return -EINVAL; @@ -4828,31 +4759,15 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } -#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd +#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { - struct bpf_token *token = NULL; - if (CHECK_ATTR(BPF_BTF_LOAD)) return -EINVAL; - if (attr->btf_token_fd) { - token = bpf_token_get_from_fd(attr->btf_token_fd); - if (IS_ERR(token)) - return PTR_ERR(token); - if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { - bpf_token_put(token); - token = NULL; - } - } - - if (!bpf_token_capable(token, CAP_BPF)) { - bpf_token_put(token); + if (!bpf_capable()) return -EPERM; - } - - bpf_token_put(token); return btf_new_fd(attr, uattr, uattr_size); } @@ -5470,20 +5385,6 @@ out_prog_put: return ret; } -#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd - -static int token_create(union bpf_attr *attr) -{ - if (CHECK_ATTR(BPF_TOKEN_CREATE)) - return -EINVAL; - - /* no flags are supported yet */ - if (attr->token_create.flags) - return -EINVAL; - - return bpf_token_create(attr); -} - static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; @@ -5617,9 +5518,6 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) case BPF_PROG_BIND_MAP: err = bpf_prog_bind_map(&attr); break; - case BPF_TOKEN_CREATE: - err = token_create(&attr); - break; default: err = -EINVAL; break; @@ -5726,7 +5624,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = { const struct bpf_func_proto * __weak tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } BPF_CALL_1(bpf_sys_close, u32, fd) @@ -5776,8 +5674,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_sys_bpf: - return !bpf_token_capable(prog->aux->token, CAP_PERFMON) - ? NULL : &bpf_sys_bpf_proto; + return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; case BPF_FUNC_btf_find_by_name_kind: return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c deleted file mode 100644 index a86fccd57e2d..000000000000 --- a/kernel/bpf/token.c +++ /dev/null @@ -1,271 +0,0 @@ -#include <linux/bpf.h> -#include <linux/vmalloc.h> -#include <linux/fdtable.h> -#include <linux/file.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/idr.h> -#include <linux/namei.h> -#include <linux/user_namespace.h> -#include <linux/security.h> - -bool bpf_token_capable(const struct bpf_token *token, int cap) -{ - /* BPF token allows ns_capable() level of capabilities, but only if - * token's userns is *exactly* the same as current user's userns - */ - if (token && current_user_ns() == token->userns) { - if (ns_capable(token->userns, cap) || - (cap != CAP_SYS_ADMIN && ns_capable(token->userns, CAP_SYS_ADMIN))) - return security_bpf_token_capable(token, cap) == 0; - } - /* otherwise fallback to capable() checks */ - return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); -} - -void bpf_token_inc(struct bpf_token *token) -{ - atomic64_inc(&token->refcnt); -} - -static void bpf_token_free(struct bpf_token *token) -{ - security_bpf_token_free(token); - put_user_ns(token->userns); - kvfree(token); -} - -static void bpf_token_put_deferred(struct work_struct *work) -{ - struct bpf_token *token = container_of(work, struct bpf_token, work); - - bpf_token_free(token); -} - -void bpf_token_put(struct bpf_token *token) -{ - if (!token) - return; - - if (!atomic64_dec_and_test(&token->refcnt)) - return; - - INIT_WORK(&token->work, bpf_token_put_deferred); - schedule_work(&token->work); -} - -static int bpf_token_release(struct inode *inode, struct file *filp) -{ - struct bpf_token *token = filp->private_data; - - bpf_token_put(token); - return 0; -} - -static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) -{ - struct bpf_token *token = filp->private_data; - u64 mask; - - BUILD_BUG_ON(__MAX_BPF_CMD >= 64); - mask = (1ULL << __MAX_BPF_CMD) - 1; - if ((token->allowed_cmds & mask) == mask) - seq_printf(m, "allowed_cmds:\tany\n"); - else - seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); - - BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64); - mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; - if ((token->allowed_maps & mask) == mask) - seq_printf(m, "allowed_maps:\tany\n"); - else - seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); - - BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64); - mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; - if ((token->allowed_progs & mask) == mask) - seq_printf(m, "allowed_progs:\tany\n"); - else - seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs); - - BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64); - mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; - if ((token->allowed_attachs & mask) == mask) - seq_printf(m, "allowed_attachs:\tany\n"); - else - seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs); -} - -#define BPF_TOKEN_INODE_NAME "bpf-token" - -static const struct inode_operations bpf_token_iops = { }; - -static const struct file_operations bpf_token_fops = { - .release = bpf_token_release, - .show_fdinfo = bpf_token_show_fdinfo, -}; - -int bpf_token_create(union bpf_attr *attr) -{ - struct bpf_mount_opts *mnt_opts; - struct bpf_token *token = NULL; - struct user_namespace *userns; - struct inode *inode; - struct file *file; - struct path path; - struct fd f; - umode_t mode; - int err, fd; - - f = fdget(attr->token_create.bpffs_fd); - if (!f.file) - return -EBADF; - - path = f.file->f_path; - path_get(&path); - fdput(f); - - if (path.dentry != path.mnt->mnt_sb->s_root) { - err = -EINVAL; - goto out_path; - } - if (path.mnt->mnt_sb->s_op != &bpf_super_ops) { - err = -EINVAL; - goto out_path; - } - err = path_permission(&path, MAY_ACCESS); - if (err) - goto out_path; - - userns = path.dentry->d_sb->s_user_ns; - /* - * Enforce that creators of BPF tokens are in the same user - * namespace as the BPF FS instance. This makes reasoning about - * permissions a lot easier and we can always relax this later. - */ - if (current_user_ns() != userns) { - err = -EPERM; - goto out_path; - } - if (!ns_capable(userns, CAP_BPF)) { - err = -EPERM; - goto out_path; - } - - mnt_opts = path.dentry->d_sb->s_fs_info; - if (mnt_opts->delegate_cmds == 0 && - mnt_opts->delegate_maps == 0 && - mnt_opts->delegate_progs == 0 && - mnt_opts->delegate_attachs == 0) { - err = -ENOENT; /* no BPF token delegation is set up */ - goto out_path; - } - - mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); - inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out_path; - } - - inode->i_op = &bpf_token_iops; - inode->i_fop = &bpf_token_fops; - clear_nlink(inode); /* make sure it is unlinked */ - - file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops); - if (IS_ERR(file)) { - iput(inode); - err = PTR_ERR(file); - goto out_path; - } - - token = kvzalloc(sizeof(*token), GFP_USER); - if (!token) { - err = -ENOMEM; - goto out_file; - } - - atomic64_set(&token->refcnt, 1); - - /* remember bpffs owning userns for future ns_capable() checks */ - token->userns = get_user_ns(userns); - - token->allowed_cmds = mnt_opts->delegate_cmds; - token->allowed_maps = mnt_opts->delegate_maps; - token->allowed_progs = mnt_opts->delegate_progs; - token->allowed_attachs = mnt_opts->delegate_attachs; - - err = security_bpf_token_create(token, attr, &path); - if (err) - goto out_token; - - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - err = fd; - goto out_token; - } - - file->private_data = token; - fd_install(fd, file); - - path_put(&path); - return fd; - -out_token: - bpf_token_free(token); -out_file: - fput(file); -out_path: - path_put(&path); - return err; -} - -struct bpf_token *bpf_token_get_from_fd(u32 ufd) -{ - struct fd f = fdget(ufd); - struct bpf_token *token; - - if (!f.file) - return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_token_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - token = f.file->private_data; - bpf_token_inc(token); - fdput(f); - - return token; -} - -bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) -{ - /* BPF token can be used only within exactly the same userns in which - * it was created - */ - if (!token || current_user_ns() != token->userns) - return false; - if (!(token->allowed_cmds & (1ULL << cmd))) - return false; - return security_bpf_token_cmd(token, cmd) == 0; -} - -bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type) -{ - if (!token || type >= __MAX_BPF_MAP_TYPE) - return false; - - return token->allowed_maps & (1ULL << type); -} - -bool bpf_token_allow_prog_type(const struct bpf_token *token, - enum bpf_prog_type prog_type, - enum bpf_attach_type attach_type) -{ - if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE) - return false; - - return (token->allowed_progs & (1ULL << prog_type)) && - (token->allowed_attachs & (1ULL << attach_type)); -} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9456ee0ad129..a376eb609c41 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -437,16 +437,6 @@ static const char *subprog_name(const struct bpf_verifier_env *env, int subprog) return btf_type_name(env->prog->aux->btf, info->type_id); } -static struct bpf_func_info_aux *subprog_aux(const struct bpf_verifier_env *env, int subprog) -{ - return &env->prog->aux->func_info_aux[subprog]; -} - -static struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env, int subprog) -{ - return &env->subprog_info[subprog]; -} - static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog) { struct bpf_subprog_info *info = subprog_info(env, subprog); @@ -5137,8 +5127,8 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env, return 0; } -int check_ptr_off_reg(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno) +static int check_ptr_off_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) { return __check_ptr_off_reg(env, reg, regno, false); } @@ -7310,8 +7300,8 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, return err; } -int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno, u32 mem_size) +static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + u32 regno, u32 mem_size) { bool may_be_null = type_may_be_null(reg->type); struct bpf_reg_state saved_reg; @@ -8296,9 +8286,9 @@ reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) return field; } -int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, int regno, - enum bpf_arg_type arg_type) +static int check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno, + enum bpf_arg_type arg_type) { u32 type = reg->type; @@ -9259,6 +9249,102 @@ err_out: return err; } +static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, + const struct btf *btf, + struct bpf_reg_state *regs) +{ + struct bpf_subprog_info *sub = subprog_info(env, subprog); + struct bpf_verifier_log *log = &env->log; + u32 i; + int ret; + + ret = btf_prepare_func_args(env, subprog); + if (ret) + return ret; + + /* check that BTF function arguments match actual types that the + * verifier sees. + */ + for (i = 0; i < sub->arg_cnt; i++) { + u32 regno = i + 1; + struct bpf_reg_state *reg = ®s[regno]; + struct bpf_subprog_arg_info *arg = &sub->args[i]; + + if (arg->arg_type == ARG_ANYTHING) { + if (reg->type != SCALAR_VALUE) { + bpf_log(log, "R%d is not a scalar\n", regno); + return -EINVAL; + } + } else if (arg->arg_type == ARG_PTR_TO_CTX) { + ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); + if (ret < 0) + return ret; + /* If function expects ctx type in BTF check that caller + * is passing PTR_TO_CTX. + */ + if (reg->type != PTR_TO_CTX) { + bpf_log(log, "arg#%d expects pointer to ctx\n", i); + return -EINVAL; + } + } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { + ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); + if (ret < 0) + return ret; + if (check_mem_reg(env, reg, regno, arg->mem_size)) + return -EINVAL; + if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) { + bpf_log(log, "arg#%d is expected to be non-NULL\n", i); + return -EINVAL; + } + } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { + ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0); + if (ret) + return ret; + } else { + bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n", + i, arg->arg_type); + return -EFAULT; + } + } + + return 0; +} + +/* Compare BTF of a function call with given bpf_reg_state. + * Returns: + * EFAULT - there is a verifier bug. Abort verification. + * EINVAL - there is a type mismatch or BTF is not available. + * 0 - BTF matches with what bpf_reg_state expects. + * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. + */ +static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs) +{ + struct bpf_prog *prog = env->prog; + struct btf *btf = prog->aux->btf; + u32 btf_id; + int err; + + if (!prog->aux->func_info) + return -EINVAL; + + btf_id = prog->aux->func_info[subprog].type_id; + if (!btf_id) + return -EFAULT; + + if (prog->aux->func_info_aux[subprog].unreliable) + return -EINVAL; + + err = btf_check_func_arg_match(env, subprog, btf, regs); + /* Compiler optimizations can remove arguments from static functions + * or mismatched type can be passed into a global function. + * In such cases mark the function as unreliable from BTF point of view. + */ + if (err) + prog->aux->func_info_aux[subprog].unreliable = true; + return err; +} + static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int insn_idx, int subprog, set_callee_state_fn set_callee_state_cb) @@ -9530,7 +9616,7 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env, callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; __mark_reg_known_zero(&callee->regs[BPF_REG_2]); callee->regs[BPF_REG_2].btf = btf_vmlinux; - callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA], + callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; /* pointer to stack or null */ callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; @@ -14336,7 +14422,43 @@ again: } break; case BPF_JNE: - /* we don't derive any new information for inequality yet */ + if (!is_reg_const(reg2, is_jmp32)) + swap(reg1, reg2); + if (!is_reg_const(reg2, is_jmp32)) + break; + + /* try to recompute the bound of reg1 if reg2 is a const and + * is exactly the edge of reg1. + */ + val = reg_const_value(reg2, is_jmp32); + if (is_jmp32) { + /* u32_min_value is not equal to 0xffffffff at this point, + * because otherwise u32_max_value is 0xffffffff as well, + * in such a case both reg1 and reg2 would be constants, + * jump would be predicted and reg_set_min_max() won't + * be called. + * + * Same reasoning works for all {u,s}{min,max}{32,64} cases + * below. + */ + if (reg1->u32_min_value == (u32)val) + reg1->u32_min_value++; + if (reg1->u32_max_value == (u32)val) + reg1->u32_max_value--; + if (reg1->s32_min_value == (s32)val) + reg1->s32_min_value++; + if (reg1->s32_max_value == (s32)val) + reg1->s32_max_value--; + } else { + if (reg1->umin_value == (u64)val) + reg1->umin_value++; + if (reg1->umax_value == (u64)val) + reg1->umax_value--; + if (reg1->smin_value == (s64)val) + reg1->smin_value++; + if (reg1->smax_value == (s64)val) + reg1->smax_value--; + } break; case BPF_JSET: if (!is_reg_const(reg2, is_jmp32)) @@ -19873,6 +19995,7 @@ static void free_states(struct bpf_verifier_env *env) static int do_check_common(struct bpf_verifier_env *env, int subprog) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); + struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_verifier_state *state; struct bpf_reg_state *regs; int ret, i; @@ -19899,54 +20022,71 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) state->first_insn_idx = env->subprog_info[subprog].start; state->last_insn_idx = -1; + regs = state->frame[state->curframe]->regs; if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { - u32 nargs; + const char *sub_name = subprog_name(env, subprog); + struct bpf_subprog_arg_info *arg; + struct bpf_reg_state *reg; - ret = btf_prepare_func_args(env, subprog, regs, &nargs); + verbose(env, "Validating %s() func#%d...\n", sub_name, subprog); + ret = btf_prepare_func_args(env, subprog); if (ret) goto out; + if (subprog_is_exc_cb(env, subprog)) { state->frame[0]->in_exception_callback_fn = true; /* We have already ensured that the callback returns an integer, just * like all global subprogs. We need to determine it only has a single * scalar argument. */ - if (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE) { + if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) { verbose(env, "exception cb only supports single integer argument\n"); ret = -EINVAL; goto out; } } - for (i = BPF_REG_1; i <= BPF_REG_5; i++) { - if (regs[i].type == PTR_TO_CTX) + for (i = BPF_REG_1; i <= sub->arg_cnt; i++) { + arg = &sub->args[i - BPF_REG_1]; + reg = ®s[i]; + + if (arg->arg_type == ARG_PTR_TO_CTX) { + reg->type = PTR_TO_CTX; mark_reg_known_zero(env, regs, i); - else if (regs[i].type == SCALAR_VALUE) + } else if (arg->arg_type == ARG_ANYTHING) { + reg->type = SCALAR_VALUE; mark_reg_unknown(env, regs, i); - else if (base_type(regs[i].type) == PTR_TO_MEM) { - const u32 mem_size = regs[i].mem_size; - + } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { + /* assume unspecial LOCAL dynptr type */ + __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen); + } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { + reg->type = PTR_TO_MEM; + if (arg->arg_type & PTR_MAYBE_NULL) + reg->type |= PTR_MAYBE_NULL; mark_reg_known_zero(env, regs, i); - regs[i].mem_size = mem_size; - regs[i].id = ++env->id_gen; + reg->mem_size = arg->mem_size; + reg->id = ++env->id_gen; + } else { + WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n", + i - BPF_REG_1, arg->arg_type); + ret = -EFAULT; + goto out; } } } else { + /* if main BPF program has associated BTF info, validate that + * it's matching expected signature, and otherwise mark BTF + * info for main program as unreliable + */ + if (env->prog->aux->func_info_aux) { + ret = btf_prepare_func_args(env, 0); + if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) + env->prog->aux->func_info_aux[0].unreliable = true; + } + /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; mark_reg_known_zero(env, regs, BPF_REG_1); - ret = btf_check_subprog_arg_match(env, subprog, regs); - if (ret == -EFAULT) - /* unlikely verifier bug. abort. - * ret == 0 and ret < 0 are sadly acceptable for - * main() function due to backward compatibility. - * Like socket filter program may be written as: - * int bpf_prog(struct pt_regs *ctx) - * and never dereference that ctx in the program. - * 'struct pt_regs' is a type mismatch for socket - * filter that should be using 'struct __sk_buff'. - */ - goto out; } ret = do_check(env); @@ -20594,12 +20734,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); - - env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); - env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); - env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token); - env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token); - env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF); + is_priv = bpf_capable(); bpf_get_btf_vmlinux(); @@ -20631,6 +20766,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; + env->allow_ptr_leaks = bpf_allow_ptr_leaks(); + env->allow_uninit_stack = bpf_allow_uninit_stack(); + env->bypass_spec_v1 = bpf_bypass_spec_v1(); + env->bypass_spec_v4 = bpf_bypass_spec_v4(); + env->bpf_capable = bpf_capable(); + if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index efe87d501c8c..d4313b53837e 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -199,7 +199,7 @@ static __initdata char *suffix_tbl[] = { * It returns 0 on success and -EINVAL on failure. */ static int __init parse_crashkernel_suffix(char *cmdline, - unsigned long long *crash_size, + unsigned long long *crash_size, const char *suffix) { char *cur = cmdline; @@ -268,9 +268,9 @@ static int __init __parse_crashkernel(char *cmdline, unsigned long long *crash_base, const char *suffix) { - char *first_colon, *first_space; - char *ck_cmdline; - char *name = "crashkernel="; + char *first_colon, *first_space; + char *ck_cmdline; + char *name = "crashkernel="; BUG_ON(!crash_size || !crash_base); *crash_size = 0; @@ -440,7 +440,7 @@ retry: return; } - if ((crash_base > CRASH_ADDR_LOW_MAX) && + if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size && reserve_crashkernel_low(crash_low_size)) { memblock_phys_free(crash_base, crash_size); return; diff --git a/kernel/cred.c b/kernel/cred.c index 3c714cb31660..c033a201c808 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -43,10 +43,6 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; */ struct cred init_cred = { .usage = ATOMIC_INIT(4), -#ifdef CONFIG_DEBUG_CREDENTIALS - .subscribers = ATOMIC_INIT(2), - .magic = CRED_MAGIC, -#endif .uid = GLOBAL_ROOT_UID, .gid = GLOBAL_ROOT_GID, .suid = GLOBAL_ROOT_UID, @@ -66,31 +62,6 @@ struct cred init_cred = { .ucounts = &init_ucounts, }; -static inline void set_cred_subscribers(struct cred *cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_set(&cred->subscribers, n); -#endif -} - -static inline int read_cred_subscribers(const struct cred *cred) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - return atomic_read(&cred->subscribers); -#else - return 0; -#endif -} - -static inline void alter_cred_subscribers(const struct cred *_cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - struct cred *cred = (struct cred *) _cred; - - atomic_add(n, &cred->subscribers); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -100,20 +71,9 @@ static void put_cred_rcu(struct rcu_head *rcu) kdebug("put_cred_rcu(%p)", cred); -#ifdef CONFIG_DEBUG_CREDENTIALS - if (cred->magic != CRED_MAGIC_DEAD || - atomic_read(&cred->usage) != 0 || - read_cred_subscribers(cred) != 0) - panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %d, subscr %d\n", - cred, cred->magic, cred->put_addr, - atomic_read(&cred->usage), - read_cred_subscribers(cred)); -#else - if (atomic_read(&cred->usage) != 0) - panic("CRED: put_cred_rcu() sees %p with usage %d\n", - cred, atomic_read(&cred->usage)); -#endif + if (atomic_long_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %ld\n", + cred, atomic_long_read(&cred->usage)); security_cred_free(cred); key_put(cred->session_keyring); @@ -137,16 +97,10 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%d,%d})", cred, - atomic_read(&cred->usage), - read_cred_subscribers(cred)); - - BUG_ON(atomic_read(&cred->usage) != 0); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(cred) != 0); - cred->magic = CRED_MAGIC_DEAD; - cred->put_addr = __builtin_return_address(0); -#endif + kdebug("__put_cred(%p{%ld})", cred, + atomic_long_read(&cred->usage)); + + BUG_ON(atomic_long_read(&cred->usage) != 0); BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); @@ -164,9 +118,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *real_cred, *cred; - kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); + kdebug("exit_creds(%u,%p,%p,{%ld})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage)); real_cred = (struct cred *) tsk->real_cred; tsk->real_cred = NULL; @@ -174,15 +127,10 @@ void exit_creds(struct task_struct *tsk) cred = (struct cred *) tsk->cred; tsk->cred = NULL; - validate_creds(cred); if (real_cred == cred) { - alter_cred_subscribers(cred, -2); put_cred_many(cred, 2); } else { - validate_creds(real_cred); - alter_cred_subscribers(real_cred, -1); put_cred(real_cred); - alter_cred_subscribers(cred, -1); put_cred(cred); } @@ -230,10 +178,7 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; - atomic_set(&new->usage, 1); -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif + atomic_long_set(&new->usage, 1); if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -264,8 +209,6 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - validate_process_creds(); - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) return NULL; @@ -276,8 +219,7 @@ struct cred *prepare_creds(void) memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; - atomic_set(&new->usage, 1); - set_cred_subscribers(new, 0); + atomic_long_set(&new->usage, 1); get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -300,7 +242,6 @@ struct cred *prepare_creds(void) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; - validate_creds(new); return new; error: @@ -362,10 +303,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) clone_flags & CLONE_THREAD ) { p->real_cred = get_cred_many(p->cred, 2); - alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%d,%d})", - p->cred, atomic_read(&p->cred->usage), - read_cred_subscribers(p->cred)); + kdebug("share_creds(%p{%ld})", + p->cred, atomic_long_read(&p->cred->usage)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; } @@ -404,8 +343,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->cred = p->real_cred = get_cred(new); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(new, 2); - validate_creds(new); return 0; error_put: @@ -457,17 +394,11 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%d,%d})", new, - atomic_read(&new->usage), - read_cred_subscribers(new)); + kdebug("commit_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); BUG_ON(task->cred != old); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(old) < 2); - validate_creds(old); - validate_creds(new); -#endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -502,14 +433,12 @@ int commit_creds(struct cred *new) * RLIMIT_NPROC limits on user->processes have already been checked * in set_user(). */ - alter_cred_subscribers(new, 2); if (new->user != old->user || new->user_ns != old->user_ns) inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user || new->user_ns != old->user_ns) dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(old, -2); /* send notifications */ if (!uid_eq(new->uid, old->uid) || @@ -539,14 +468,10 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%d,%d})", new, - atomic_read(&new->usage), - read_cred_subscribers(new)); + kdebug("abort_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(new) != 0); -#endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } EXPORT_SYMBOL(abort_creds); @@ -562,12 +487,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%d,%d})", new, - atomic_read(&new->usage), - read_cred_subscribers(new)); - - validate_creds(old); - validate_creds(new); + kdebug("override_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); /* * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. @@ -576,18 +497,12 @@ const struct cred *override_creds(const struct cred *new) * we are only installing the cred into the thread-synchronous * '->cred' pointer, not the '->real_cred' pointer that is * visible to other threads under RCU. - * - * Also note that we did validate_creds() manually, not depending - * on the validation in 'get_cred()'. */ get_new_cred((struct cred *)new); - alter_cred_subscribers(new, 1); rcu_assign_pointer(current->cred, new); - alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%d,%d}", old, - atomic_read(&old->usage), - read_cred_subscribers(old)); + kdebug("override_creds() = %p{%ld}", old, + atomic_long_read(&old->usage)); return old; } EXPORT_SYMBOL(override_creds); @@ -603,15 +518,10 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%d,%d})", old, - atomic_read(&old->usage), - read_cred_subscribers(old)); + kdebug("revert_creds(%p{%ld})", old, + atomic_long_read(&old->usage)); - validate_creds(old); - validate_creds(override); - alter_cred_subscribers(old, 1); rcu_assign_pointer(current->cred, old); - alter_cred_subscribers(override, -1); put_cred(override); } EXPORT_SYMBOL(revert_creds); @@ -731,12 +641,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); - validate_creds(old); *new = *old; new->non_rcu = 0; - atomic_set(&new->usage, 1); - set_cred_subscribers(new, 0); + atomic_long_set(&new->usage, 1); get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); @@ -760,7 +668,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) goto error; put_cred(old); - validate_creds(new); return new; error: @@ -825,109 +732,3 @@ int set_create_files_as(struct cred *new, struct inode *inode) return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); - -#ifdef CONFIG_DEBUG_CREDENTIALS - -bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - return false; -} -EXPORT_SYMBOL(creds_are_invalid); - -/* - * dump invalid credentials - */ -static void dump_invalid_creds(const struct cred *cred, const char *label, - const struct task_struct *tsk) -{ - pr_err("%s credentials: %p %s%s%s\n", - label, cred, - cred == &init_cred ? "[init]" : "", - cred == tsk->real_cred ? "[real]" : "", - cred == tsk->cred ? "[eff]" : ""); - pr_err("->magic=%x, put_addr=%p\n", - cred->magic, cred->put_addr); - pr_err("->usage=%d, subscr=%d\n", - atomic_read(&cred->usage), - read_cred_subscribers(cred)); - pr_err("->*uid = { %d,%d,%d,%d }\n", - from_kuid_munged(&init_user_ns, cred->uid), - from_kuid_munged(&init_user_ns, cred->euid), - from_kuid_munged(&init_user_ns, cred->suid), - from_kuid_munged(&init_user_ns, cred->fsuid)); - pr_err("->*gid = { %d,%d,%d,%d }\n", - from_kgid_munged(&init_user_ns, cred->gid), - from_kgid_munged(&init_user_ns, cred->egid), - from_kgid_munged(&init_user_ns, cred->sgid), - from_kgid_munged(&init_user_ns, cred->fsgid)); -#ifdef CONFIG_SECURITY - pr_err("->security is %p\n", cred->security); - if ((unsigned long) cred->security >= PAGE_SIZE && - (((unsigned long) cred->security & 0xffffff00) != - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))) - pr_err("->security {%x, %x}\n", - ((u32*)cred->security)[0], - ((u32*)cred->security)[1]); -#endif -} - -/* - * report use of invalid credentials - */ -void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) -{ - pr_err("Invalid credentials\n"); - pr_err("At %s:%u\n", file, line); - dump_invalid_creds(cred, "Specified", current); - BUG(); -} -EXPORT_SYMBOL(__invalid_creds); - -/* - * check the credentials on a process - */ -void __validate_process_creds(struct task_struct *tsk, - const char *file, unsigned line) -{ - if (tsk->cred == tsk->real_cred) { - if (unlikely(read_cred_subscribers(tsk->cred) < 2 || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } else { - if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 || - read_cred_subscribers(tsk->cred) < 1 || - creds_are_invalid(tsk->real_cred) || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } - return; - -invalid_creds: - pr_err("Invalid process credentials\n"); - pr_err("At %s:%u\n", file, line); - - dump_invalid_creds(tsk->real_cred, "Real", tsk); - if (tsk->cred != tsk->real_cred) - dump_invalid_creds(tsk->cred, "Effective", tsk); - else - pr_err("Effective creds == Real creds\n"); - BUG(); -} -EXPORT_SYMBOL(__validate_process_creds); - -/* - * check creds for do_exit() - */ -void validate_creds_for_do_exit(struct task_struct *tsk) -{ - kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})", - tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); - - __validate_process_creds(tsk, __FILE__, __LINE__); -} - -#endif /* CONFIG_DEBUG_CREDENTIALS */ diff --git a/kernel/events/core.c b/kernel/events/core.c index c9d123e13b57..9efd0d7775e7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1947,6 +1947,16 @@ static bool perf_event_validate_size(struct perf_event *event) group_leader->nr_siblings + 1) > 16*1024) return false; + /* + * When creating a new group leader, group_leader->ctx is initialized + * after the size has been validated, but we cannot safely use + * for_each_sibling_event() until group_leader->ctx is set. A new group + * leader cannot have any siblings yet, so we can safely skip checking + * the non-existent siblings. + */ + if (event == group_leader) + return true; + for_each_sibling_event(sibling, group_leader) { if (__perf_event_read_size(sibling->attr.read_format, group_leader->nr_siblings + 1) > 16*1024) diff --git a/kernel/exit.c b/kernel/exit.c index ee9f43bed49a..aedc0832c9f4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -824,8 +824,6 @@ void __noreturn do_exit(long code) ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); - validate_creds_for_do_exit(tsk); - io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -909,7 +907,6 @@ void __noreturn do_exit(long code) if (tsk->task_frag.page) put_page(tsk->task_frag.page); - validate_creds_for_do_exit(tsk); exit_task_stack_account(tsk); check_stack_usage(); diff --git a/kernel/resource.c b/kernel/resource.c index 866ef3663a0b..91be1bc50b60 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1844,8 +1844,8 @@ get_free_mem_region(struct device *dev, struct resource *base, write_lock(&resource_lock); for (addr = gfr_start(base, size, align, flags); - gfr_continue(base, addr, size, flags); - addr = gfr_next(addr, size, flags)) { + gfr_continue(base, addr, align, flags); + addr = gfr_next(addr, align, flags)) { if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE) != REGION_DISJOINT) continue; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e1a6e3c675c0..9a846439b36a 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -201,6 +201,20 @@ COND_SYSCALL(recvmmsg_time32); COND_SYSCALL_COMPAT(recvmmsg_time32); COND_SYSCALL_COMPAT(recvmmsg_time64); +/* Posix timer syscalls may be configured out */ +COND_SYSCALL(timer_create); +COND_SYSCALL(timer_gettime); +COND_SYSCALL(timer_getoverrun); +COND_SYSCALL(timer_settime); +COND_SYSCALL(timer_delete); +COND_SYSCALL(clock_adjtime); +COND_SYSCALL(getitimer); +COND_SYSCALL(setitimer); +COND_SYSCALL(alarm); +COND_SYSCALL_COMPAT(timer_create); +COND_SYSCALL_COMPAT(getitimer); +COND_SYSCALL_COMPAT(setitimer); + /* * Architecture specific syscalls: see further below */ diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 828aeecbd1e8..9b6fcb8d85e7 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -17,40 +17,6 @@ #include <linux/time_namespace.h> #include <linux/compat.h> -#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER -/* Architectures may override SYS_NI and COMPAT_SYS_NI */ -#include <asm/syscall_wrapper.h> -#endif - -asmlinkage long sys_ni_posix_timers(void) -{ - pr_err_once("process %d (%s) attempted a POSIX timer syscall " - "while CONFIG_POSIX_TIMERS is not set\n", - current->pid, current->comm); - return -ENOSYS; -} - -#ifndef SYS_NI -#define SYS_NI(name) SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers) -#endif - -#ifndef COMPAT_SYS_NI -#define COMPAT_SYS_NI(name) SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers) -#endif - -SYS_NI(timer_create); -SYS_NI(timer_gettime); -SYS_NI(timer_getoverrun); -SYS_NI(timer_settime); -SYS_NI(timer_delete); -SYS_NI(clock_adjtime); -SYS_NI(getitimer); -SYS_NI(setitimer); -SYS_NI(clock_adjtime32); -#ifdef __ARCH_WANT_SYS_ALARM -SYS_NI(alarm); -#endif - /* * We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC * as it is easy to remain compatible with little code. CLOCK_BOOTTIME @@ -158,18 +124,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, which_clock); } -#ifdef CONFIG_COMPAT -COMPAT_SYS_NI(timer_create); -#endif - -#if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA) -COMPAT_SYS_NI(getitimer); -COMPAT_SYS_NI(setitimer); -#endif - #ifdef CONFIG_COMPAT_32BIT_TIME -SYS_NI(timer_settime32); -SYS_NI(timer_gettime32); SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock, struct old_timespec32 __user *, tp) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 492d60e9c480..7ac6c52b25eb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8d2a4f00eca9..83eab547f1d1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) *cnt = rb_time_cnt(top); - /* If top and msb counts don't match, this interrupted a write */ - if (*cnt != rb_time_cnt(msb)) + /* If top, msb or bottom counts don't match, this interrupted a write */ + if (*cnt != rb_time_cnt(msb) || *cnt != rb_time_cnt(bottom)) return false; /* The shift to msb will lose its cnt bits */ @@ -700,44 +700,6 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) return local_try_cmpxchg(l, &expect, set); } -static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) -{ - unsigned long cnt, top, bottom, msb; - unsigned long cnt2, top2, bottom2, msb2; - u64 val; - - /* The cmpxchg always fails if it interrupted an update */ - if (!__rb_time_read(t, &val, &cnt2)) - return false; - - if (val != expect) - return false; - - cnt = local_read(&t->cnt); - if ((cnt & 3) != cnt2) - return false; - - cnt2 = cnt + 1; - - rb_time_split(val, &top, &bottom, &msb); - top = rb_time_val_cnt(top, cnt); - bottom = rb_time_val_cnt(bottom, cnt); - - rb_time_split(set, &top2, &bottom2, &msb2); - top2 = rb_time_val_cnt(top2, cnt2); - bottom2 = rb_time_val_cnt(bottom2, cnt2); - - if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2)) - return false; - if (!rb_time_read_cmpxchg(&t->msb, msb, msb2)) - return false; - if (!rb_time_read_cmpxchg(&t->top, top, top2)) - return false; - if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2)) - return false; - return true; -} - #else /* 64 bits */ /* local64_t always succeeds */ @@ -751,11 +713,6 @@ static void rb_time_set(rb_time_t *t, u64 val) { local64_set(&t->time, val); } - -static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) -{ - return local64_try_cmpxchg(&t->time, &expect, set); -} #endif /* @@ -1787,6 +1744,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(bpage); } + free_page((unsigned long)cpu_buffer->free_page); + kfree(cpu_buffer); } @@ -2407,7 +2366,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ barrier(); - if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE) + if ((iter->head + length) > commit || length > BUF_PAGE_SIZE) /* Writer corrupted the read? */ goto reset; @@ -2981,25 +2940,6 @@ static unsigned rb_calculate_event_length(unsigned length) return length; } -static u64 rb_time_delta(struct ring_buffer_event *event) -{ - switch (event->type_len) { - case RINGBUF_TYPE_PADDING: - return 0; - - case RINGBUF_TYPE_TIME_EXTEND: - return rb_event_time_stamp(event); - - case RINGBUF_TYPE_TIME_STAMP: - return 0; - - case RINGBUF_TYPE_DATA: - return event->time_delta; - default: - return 0; - } -} - static inline bool rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) @@ -3007,8 +2947,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, unsigned long new_index, old_index; struct buffer_page *bpage; unsigned long addr; - u64 write_stamp; - u64 delta; new_index = rb_event_index(event); old_index = new_index + rb_event_ts_length(event); @@ -3017,14 +2955,10 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, bpage = READ_ONCE(cpu_buffer->tail_page); - delta = rb_time_delta(event); - - if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp)) - return false; - - /* Make sure the write stamp is read before testing the location */ - barrier(); - + /* + * Make sure the tail_page is still the same and + * the next write location is the end of this event + */ if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = local_read(&bpage->write) & ~RB_WRITE_MASK; @@ -3035,20 +2969,20 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, * to make sure that the next event adds an absolute * value and does not rely on the saved write stamp, which * is now going to be bogus. + * + * By setting the before_stamp to zero, the next event + * is not going to use the write_stamp and will instead + * create an absolute timestamp. This means there's no + * reason to update the wirte_stamp! */ rb_time_set(&cpu_buffer->before_stamp, 0); - /* Something came in, can't discard */ - if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, - write_stamp, write_stamp - delta)) - return false; - /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume * that this event just added itself before updating * the write stamp. The interrupting event will fix the - * write stamp for us, and use the before stamp as its delta. + * write stamp for us, and use an absolute timestamp. */ /* @@ -3485,7 +3419,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, return; /* - * If this interrupted another event, + * If this interrupted another event, */ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1) goto out; @@ -3579,7 +3513,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * absolute timestamp. * Don't bother if this is the start of a new page (w == 0). */ - if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) { + if (!w) { + /* Use the sub-buffer timestamp */ + info->delta = 0; + } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) { info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; info->length += RB_LEN_TIME_EXTEND; } else { @@ -3602,26 +3539,19 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) { - /* before and after may now different, fix it up*/ - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - if (a_ok && b_ok && info->before != info->after) - (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, - info->before, info->after); - if (a_ok && b_ok) - check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); + check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); return rb_move_tail(cpu_buffer, tail, info); } if (likely(tail == w)) { - u64 save_before; - bool s_ok; - /* Nothing interrupted us between A and C */ /*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts); - barrier(); - /*E*/ s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before); - RB_WARN_ON(cpu_buffer, !s_ok); + /* + * If something came in between C and D, the write stamp + * may now not be in sync. But that's fine as the before_stamp + * will be different and then next event will just be forced + * to use an absolute timestamp. + */ if (likely(!(info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) /* This did not interrupt any time update */ @@ -3629,41 +3559,40 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, else /* Just use full timestamp for interrupting event */ info->delta = info->ts; - barrier(); check_buffer(cpu_buffer, info, tail); - if (unlikely(info->ts != save_before)) { - /* SLOW PATH - Interrupted between C and E */ - - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - RB_WARN_ON(cpu_buffer, !a_ok); - - /* Write stamp must only go forward */ - if (save_before > info->after) { - /* - * We do not care about the result, only that - * it gets updated atomically. - */ - (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, save_before); - } - } } else { u64 ts; /* SLOW PATH - Interrupted between A and C */ - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - /* Was interrupted before here, write_stamp must be valid */ + + /* Save the old before_stamp */ + a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); RB_WARN_ON(cpu_buffer, !a_ok); + + /* + * Read a new timestamp and update the before_stamp to make + * the next event after this one force using an absolute + * timestamp. This is in case an interrupt were to come in + * between E and F. + */ ts = rb_time_stamp(cpu_buffer->buffer); + rb_time_set(&cpu_buffer->before_stamp, ts); + barrier(); - /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && - info->after < ts && - rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, ts)) { - /* Nothing came after this event between C and E */ + /*E*/ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); + /* Was interrupted before here, write_stamp must be valid */ + RB_WARN_ON(cpu_buffer, !a_ok); + barrier(); + /*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && + info->after == info->before && info->after < ts) { + /* + * Nothing came after this event between C and F, it is + * safe to use info->after for the delta as it + * matched info->before and is still valid. + */ info->delta = ts - info->after; } else { /* - * Interrupted between C and E: + * Interrupted between C and F: * Lost the previous events time stamp. Just set the * delta to zero, and this will be the same time as * the event this event interrupted. And the events that @@ -3714,6 +3643,12 @@ rb_reserve_next_event(struct trace_buffer *buffer, int nr_loops = 0; int add_ts_default; + /* ring buffer does cmpxchg, make sure it is safe in NMI context */ + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && + (unlikely(in_nmi()))) { + return NULL; + } + rb_start_commit(cpu_buffer); /* The commit page can not change after this */ @@ -3737,6 +3672,8 @@ rb_reserve_next_event(struct trace_buffer *buffer, if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { add_ts_default = RB_ADD_STAMP_ABSOLUTE; info.length += RB_LEN_TIME_EXTEND; + if (info.length > BUF_MAX_DATA_SIZE) + goto out_fail; } else { add_ts_default = RB_ADD_STAMP_NONE; } @@ -5118,7 +5055,8 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) if (!iter) return NULL; - iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags); + /* Holds the entire event: data and meta data */ + iter->event = kmalloc(BUF_PAGE_SIZE, flags); if (!iter->event) { kfree(iter); return NULL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fbcd3bafb93e..199df497db07 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4722,7 +4722,11 @@ static int s_show(struct seq_file *m, void *v) iter->leftover = ret; } else { - print_trace_line(iter); + ret = print_trace_line(iter); + if (ret == TRACE_TYPE_PARTIAL_LINE) { + iter->seq.full = 0; + trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); + } ret = trace_print_seq(m, &iter->seq); /* * If we overflow the seq_file buffer, then it will @@ -4964,6 +4968,12 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp) return 0; } +int tracing_single_release_file_tr(struct inode *inode, struct file *filp) +{ + tracing_release_file_tr(inode, filp); + return single_release(inode, filp); +} + static int tracing_mark_open(struct inode *inode, struct file *filp) { stream_open(inode, filp); @@ -6344,7 +6354,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (!tr->array_buffer.buffer) return 0; - /* Do not allow tracing while resizng ring buffer */ + /* Do not allow tracing while resizing ring buffer */ tracing_stop_tr(tr); ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); @@ -6352,7 +6362,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE - if (!tr->current_trace->use_max_tr) + if (!tr->allocated_snapshot) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b7f4ea25a194..0489e72c8169 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -617,6 +617,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); int tracing_open_file_tr(struct inode *inode, struct file *filp); int tracing_release_file_tr(struct inode *inode, struct file *filp); +int tracing_single_release_file_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1abc07fba1b9..5ecf3c8bde20 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5623,10 +5623,12 @@ static int event_hist_open(struct inode *inode, struct file *file) { int ret; - ret = security_locked_down(LOCKDOWN_TRACEFS); + ret = tracing_open_file_tr(inode, file); if (ret) return ret; + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; return single_open(file, hist_show, file); } @@ -5634,7 +5636,7 @@ const struct file_operations event_hist_fops = { .open = event_hist_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_file_tr, }; #ifdef CONFIG_HIST_TRIGGERS_DEBUG @@ -5900,10 +5902,12 @@ static int event_hist_debug_open(struct inode *inode, struct file *file) { int ret; - ret = security_locked_down(LOCKDOWN_TRACEFS); + ret = tracing_open_file_tr(inode, file); if (ret) return ret; + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; return single_open(file, hist_debug_show, file); } @@ -5911,7 +5915,7 @@ const struct file_operations event_hist_debug_fops = { .open = event_hist_debug_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = tracing_single_release_file_tr, }; #endif diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d8b302d01083..3e7fa44dc2b2 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1587,11 +1587,12 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, { struct print_entry *field; struct trace_seq *s = &iter->seq; + int max = iter->ent_size - offsetof(struct print_entry, buf); trace_assign_type(field, iter->ent); seq_print_ip_sym(s, field->ip, flags); - trace_seq_printf(s, ": %s", field->buf); + trace_seq_printf(s, ": %.*s", max, field->buf); return trace_handle_return(s); } @@ -1600,10 +1601,11 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, struct trace_event *event) { struct print_entry *field; + int max = iter->ent_size - offsetof(struct print_entry, buf); trace_assign_type(field, iter->ent); - trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf); + trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf); return trace_handle_return(&iter->seq); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cc7d53d9dc01..4405f81248fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1739,21 +1739,6 @@ config DEBUG_MAPLE_TREE endmenu -config DEBUG_CREDENTIALS - bool "Debug credential management" - depends on DEBUG_KERNEL - help - Enable this to turn on some debug checking for credential - management. The additional code keeps track of the number of - pointers from task_structs to any given cred struct, and checks to - see that this number never exceeds the usage count of the cred - struct. - - Furthermore, if SELinux is enabled, this also checks that the - security pointer in the cred struct is never seen to be invalid. - - If unsure, say N. - source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU diff --git a/mm/damon/core.c b/mm/damon/core.c index ce1562783e7e..3a05e71509b9 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -445,6 +445,8 @@ struct damon_ctx *damon_new_ctx(void) if (!ctx) return NULL; + init_completion(&ctx->kdamond_started); + ctx->attrs.sample_interval = 5 * 1000; ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; @@ -668,11 +670,14 @@ static int __damon_start(struct damon_ctx *ctx) mutex_lock(&ctx->kdamond_lock); if (!ctx->kdamond) { err = 0; + reinit_completion(&ctx->kdamond_started); ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); ctx->kdamond = NULL; + } else { + wait_for_completion(&ctx->kdamond_started); } } mutex_unlock(&ctx->kdamond_lock); @@ -1433,6 +1438,7 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + complete(&ctx->kdamond_started); kdamond_init_intervals_sis(ctx); if (ctx->ops.init) diff --git a/mm/shmem.c b/mm/shmem.c index 91e2620148b2..0d1ce70bce38 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1080,7 +1080,24 @@ whole_folios: } VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); - truncate_inode_folio(mapping, folio); + + if (!folio_test_large(folio)) { + truncate_inode_folio(mapping, folio); + } else if (truncate_inode_partial_folio(folio, lstart, lend)) { + /* + * If we split a page, reset the loop so + * that we pick up the new sub pages. + * Otherwise the THP was entirely + * dropped or the target range was + * zeroed, so just continue the loop as + * is. + */ + if (!folio_test_large(folio)) { + folio_unlock(folio); + index = start; + break; + } + } } folio_unlock(folio); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 506f8220c5fe..9dd8977de5a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4089,6 +4089,9 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) else VM_WARN_ON_ONCE(true); + WRITE_ONCE(lruvec->lrugen.seg, seg); + WRITE_ONCE(lruvec->lrugen.gen, new); + hlist_nulls_del_rcu(&lruvec->lrugen.list); if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) @@ -4099,9 +4102,6 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) pgdat->memcg_lru.nr_memcgs[old]--; pgdat->memcg_lru.nr_memcgs[new]++; - lruvec->lrugen.gen = new; - WRITE_ONCE(lruvec->lrugen.seg, seg); - if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); @@ -4124,11 +4124,11 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg) gen = get_memcg_gen(pgdat->memcg_lru.seq); + lruvec->lrugen.gen = gen; + hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); pgdat->memcg_lru.nr_memcgs[gen]++; - lruvec->lrugen.gen = gen; - spin_unlock_irq(&pgdat->memcg_lru.lock); } } @@ -4232,7 +4232,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* protected */ - if (tier > tier_idx) { + if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); @@ -4598,7 +4598,12 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, } /* try to scrape all its memory if this memcg was deleted */ - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; + if (!mem_cgroup_online(memcg)) { + *nr_to_scan = total; + return false; + } + + *nr_to_scan = total >> sc->priority; /* * The aging tries to be lazy to reduce the overhead, while the eviction @@ -4635,7 +4640,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool DEFINE_MAX_SEQ(lruvec); if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) - return 0; + return -1; if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) return nr_to_scan; @@ -4648,20 +4653,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0; } -static unsigned long get_nr_to_reclaim(struct scan_control *sc) +static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc) { + int i; + enum zone_watermarks mark; + /* don't abort memcg reclaim to ensure fairness */ if (!root_reclaim(sc)) - return -1; + return false; + + if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order))) + return true; + + /* check the order to exclude compaction-induced reclaim */ + if (!current_is_kswapd() || sc->order) + return false; + + mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ? + WMARK_PROMO : WMARK_HIGH; + + for (i = 0; i <= sc->reclaim_idx; i++) { + struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i; + unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH; + + if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0)) + return false; + } - return max(sc->nr_to_reclaim, compact_gap(sc->order)); + /* kswapd should abort if all eligible zones are safe */ + return true; } static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { long nr_to_scan; unsigned long scanned = 0; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); int swappiness = get_swappiness(lruvec, sc); /* clean file folios are more likely to exist */ @@ -4683,13 +4709,13 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) if (scanned >= nr_to_scan) break; - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; cond_resched(); } - /* whether try_to_inc_max_seq() was successful */ + /* whether this lruvec should be rotated */ return nr_to_scan < 0; } @@ -4698,14 +4724,9 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) bool success; unsigned long scanned = sc->nr_scanned; unsigned long reclaimed = sc->nr_reclaimed; - int seg = lru_gen_memcg_seg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - /* see the comment on MEMCG_NR_GENS */ - if (!lruvec_is_sizable(lruvec, sc)) - return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; - mem_cgroup_calculate_protection(NULL, memcg); if (mem_cgroup_below_min(NULL, memcg)) @@ -4713,7 +4734,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) if (mem_cgroup_below_low(NULL, memcg)) { /* see the comment on MEMCG_NR_GENS */ - if (seg != MEMCG_LRU_TAIL) + if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL) return MEMCG_LRU_TAIL; memcg_memory_event(memcg, MEMCG_LOW); @@ -4729,7 +4750,15 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) flush_reclaim_state(sc); - return success ? MEMCG_LRU_YOUNG : 0; + if (success && mem_cgroup_online(memcg)) + return MEMCG_LRU_YOUNG; + + if (!success && lruvec_is_sizable(lruvec, sc)) + return 0; + + /* one retry if offlined or too small */ + return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ? + MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; } #ifdef CONFIG_MEMCG @@ -4743,14 +4772,13 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) struct lruvec *lruvec; struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; - const struct hlist_nulls_node *pos; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); + struct hlist_nulls_node *pos; + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: op = 0; memcg = NULL; - gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); rcu_read_lock(); @@ -4761,6 +4789,10 @@ restart: } mem_cgroup_put(memcg); + memcg = NULL; + + if (gen != READ_ONCE(lrugen->gen)) + continue; lruvec = container_of(lrugen, struct lruvec, lrugen); memcg = lruvec_memcg(lruvec); @@ -4777,7 +4809,7 @@ restart: rcu_read_lock(); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; } @@ -4788,7 +4820,7 @@ restart: mem_cgroup_put(memcg); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (!is_a_nulls(pos)) return; /* restart if raced with lru_gen_rotate_memcg() */ @@ -4845,16 +4877,14 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) return; /* - * Determine the initial priority based on ((total / MEMCG_NR_GENS) >> - * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the - * estimated reclaimed_to_scanned_ratio = inactive / total. + * Determine the initial priority based on + * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, + * where reclaimed_to_scanned_ratio = inactive / total. */ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_swappiness(lruvec, sc)) reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); - reclaimable /= MEMCG_NR_GENS; - /* round down reclaimable and round up sc->nr_to_reclaim */ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); diff --git a/mm/workingset.c b/mm/workingset.c index b192e44a0e7c..33baad203277 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -313,10 +313,10 @@ static void lru_gen_refault(struct folio *folio, void *shadow) * 1. For pages accessed through page tables, hotter pages pushed out * hot pages which refaulted immediately. * 2. For pages accessed multiple times through file descriptors, - * numbers of accesses might have been out of the range. + * they would have been protected by sort_folio(). */ - if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) { - folio_set_workingset(folio); + if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) { + set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset)); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); } unlock: diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 0beb44f2fe1f..f00158234505 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -407,6 +407,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; @@ -417,6 +419,8 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } @@ -436,8 +440,11 @@ void vlan_vids_del_by_dev(struct net_device *dev, if (!vlan_info) return; - list_for_each_entry(vid_info, &vlan_info->vid_list, list) + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); + } } EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a852ec093fa8..198f5ba2feae 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1581,7 +1581,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } /* Build a packet */ - SOCK_DEBUG(sk, "SK %p: Got address.\n", sk); + net_dbg_ratelimited("SK %p: Got address.\n", sk); /* For headers */ size = sizeof(struct ddpehdr) + len + ddp_dl->header_length; @@ -1602,7 +1602,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dev = rt->dev; - SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", + net_dbg_ratelimited("SK %p: Size needed %d, device %s\n", sk, size, dev->name); hard_header_len = dev->hard_header_len; @@ -1631,7 +1631,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) skb_reserve(skb, hard_header_len); skb->dev = dev; - SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); + net_dbg_ratelimited("SK %p: Begin build.\n", sk); ddp = skb_put(skb, sizeof(struct ddpehdr)); ddp->deh_len_hops = htons(len + sizeof(*ddp)); @@ -1642,7 +1642,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ddp->deh_dport = usat->sat_port; ddp->deh_sport = at->src_port; - SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len); + net_dbg_ratelimited("SK %p: Copy user data (%zd bytes).\n", sk, len); err = memcpy_from_msg(skb_put(skb, len), msg, len); if (err) { @@ -1666,7 +1666,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (skb2) { loopback = 1; - SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk); + net_dbg_ratelimited("SK %p: send out(copy).\n", sk); /* * If it fails it is queued/sent above in the aarp queue */ @@ -1675,7 +1675,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } if (dev->flags & IFF_LOOPBACK || loopback) { - SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk); + net_dbg_ratelimited("SK %p: Loop back.\n", sk); /* loop back */ skb_orphan(skb); if (ddp->deh_dnode == ATADDR_BCAST) { @@ -1689,7 +1689,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } ddp_dl->request(ddp_dl, skb, dev->dev_addr); } else { - SOCK_DEBUG(sk, "SK %p: send out.\n", sk); + net_dbg_ratelimited("SK %p: send out.\n", sk); if (rt->flags & RTF_GATEWAY) { gsat.sat_addr = rt->gateway; usat = &gsat; @@ -1700,7 +1700,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) */ aarp_send_ddp(dev, skb, &usat->sat_addr, NULL); } - SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len); + net_dbg_ratelimited("SK %p: Done write (%zd).\n", sk, len); out: release_sock(sk); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 336a76165454..b93464ac3517 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -309,11 +309,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) - return 0; + err = 0; + release_sock(sk); return err; } @@ -343,6 +346,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2cee330188ce..a41d2693f4d8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -300,6 +300,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec) __u8 vnd_len, *vnd_data = NULL; struct hci_op_configure_data_path *cmd = NULL; + if (!codec->data_path || !hdev->get_codec_config_data) + return 0; + + /* Do not take me as error */ + if (!hdev->get_codec_config_data) + return 0; + err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, &vnd_data); if (err < 0) @@ -345,9 +352,7 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data) bt_dev_dbg(hdev, "hcon %p", conn); - /* for offload use case, codec needs to configured before opening SCO */ - if (conn->codec.data_path) - configure_datapath_sync(hdev, &conn->codec); + configure_datapath_sync(hdev, &conn->codec); conn->state = BT_CONNECT; conn->out = true; @@ -1086,8 +1091,9 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) hci_conn_failed(conn, reason); break; case ISO_LINK: - if (conn->state != BT_CONNECTED && - !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) + if ((conn->state != BT_CONNECTED && + !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) || + test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) hci_conn_failed(conn, reason); break; } @@ -2228,7 +2234,17 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 base_len, __u8 *base) { struct hci_conn *conn; + struct hci_conn *parent; __u8 eir[HCI_MAX_PER_AD_LENGTH]; + struct hci_link *link; + + /* Look for any BIS that is open for rebinding */ + conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN); + if (conn) { + memcpy(qos, &conn->iso_qos, sizeof(*qos)); + conn->state = BT_CONNECTED; + return conn; + } if (base_len && base) base_len = eir_append_service_data(eir, 0, 0x1851, @@ -2256,6 +2272,20 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, conn->iso_qos = *qos; conn->state = BT_BOUND; + /* Link BISes together */ + parent = hci_conn_hash_lookup_big(hdev, + conn->iso_qos.bcast.big); + if (parent && parent != conn) { + link = hci_conn_link(parent, conn); + if (!link) { + hci_conn_drop(conn); + return ERR_PTR(-ENOLINK); + } + + /* Link takes the refcount */ + hci_conn_drop(conn); + } + return conn; } @@ -2287,6 +2317,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, if (IS_ERR(conn)) return conn; + if (conn->state == BT_CONNECTED) + return conn; + data.big = qos->bcast.big; data.bis = qos->bcast.bis; @@ -2421,12 +2454,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); - /* If we're already encrypted set the REAUTH_PEND flag, - * otherwise set the ENCRYPT_PEND. + /* Set the ENCRYPT_PEND to trigger encryption after + * authentication. */ - if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) - set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); - else + if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 6b7741f6e95b..233453807b50 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -1046,10 +1046,12 @@ static int min_key_size_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE) + hci_dev_lock(hdev); + if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_min_key_size = val; hci_dev_unlock(hdev); @@ -1074,10 +1076,12 @@ static int max_key_size_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size) + hci_dev_lock(hdev); + if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_max_key_size = val; hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0849e0dafa95..ef8c3bed7361 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -516,6 +516,9 @@ static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data, { struct hci_rp_read_class_of_dev *rp = data; + if (WARN_ON(!hdev)) + return HCI_ERROR_UNSPECIFIED; + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); if (rp->status) @@ -747,9 +750,23 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, } else { conn->enc_key_size = rp->key_size; status = 0; + + if (conn->enc_key_size < hdev->min_enc_key_size) { + /* As slave role, the conn->state has been set to + * BT_CONNECTED and l2cap conn req might not be received + * yet, at this moment the l2cap layer almost does + * nothing with the non-zero status. + * So we also clear encrypt related bits, and then the + * handler of l2cap conn req will get the right secure + * state at a later time. + */ + status = HCI_ERROR_AUTH_FAILURE; + clear_bit(HCI_CONN_ENCRYPT, &conn->flags); + clear_bit(HCI_CONN_AES_CCM, &conn->flags); + } } - hci_encrypt_cfm(conn, 0); + hci_encrypt_cfm(conn, status); done: hci_dev_unlock(hdev); @@ -820,8 +837,6 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, if (!rp->status) conn->auth_payload_timeout = get_unaligned_le16(sent + 2); - hci_encrypt_cfm(conn, 0); - unlock: hci_dev_unlock(hdev); @@ -2304,7 +2319,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - set_bit(HCI_INQUIRY, &hdev->flags); + if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) + set_bit(HCI_INQUIRY, &hdev->flags); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) @@ -3484,14 +3500,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, if (!ev->status) { clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); - - if (!hci_conn_ssp_enabled(conn) && - test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { - bt_dev_info(hdev, "re-auth of legacy device is not possible."); - } else { - set_bit(HCI_CONN_AUTH, &conn->flags); - conn->sec_level = conn->pending_sec_level; - } + set_bit(HCI_CONN_AUTH, &conn->flags); + conn->sec_level = conn->pending_sec_level; } else { if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING) set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); @@ -3500,7 +3510,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, } clear_bit(HCI_CONN_AUTH_PEND, &conn->flags); - clear_bit(HCI_CONN_REAUTH_PEND, &conn->flags); if (conn->state == BT_CONFIG) { if (!ev->status && hci_conn_ssp_enabled(conn)) { @@ -3683,12 +3692,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, cp.handle = cpu_to_le16(conn->handle); cp.timeout = cpu_to_le16(hdev->auth_payload_timeout); if (hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO, - sizeof(cp), &cp)) { + sizeof(cp), &cp)) bt_dev_err(hdev, "write auth payload timeout failed"); - goto notify; - } - - goto unlock; } notify: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d85a7091a116..a6fc8a2a5c67 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -348,8 +348,6 @@ static void le_scan_disable(struct work_struct *work) if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) goto _return; - cancel_delayed_work(&hdev->le_scan_restart); - status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL); if (status) { bt_dev_err(hdev, "failed to disable LE scan: %d", status); @@ -397,71 +395,6 @@ _return: static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, u8 filter_dup); -static int hci_le_scan_restart_sync(struct hci_dev *hdev) -{ - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return 0; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return 0; - } - - hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00); - return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE, - LE_SCAN_FILTER_DUP_ENABLE); -} - -static void le_scan_restart(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - unsigned long timeout, duration, scan_start, now; - int status; - - bt_dev_dbg(hdev, ""); - - status = hci_le_scan_restart_sync(hdev); - if (status) { - bt_dev_err(hdev, "failed to restart LE scan: status %d", - status); - return; - } - - hci_dev_lock(hdev); - - if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || - !hdev->discovery.scan_start) - goto unlock; - - /* When the scan was started, hdev->le_scan_disable has been queued - * after duration from scan_start. During scan restart this job - * has been canceled, and we need to queue it again after proper - * timeout, to make sure that scan does not run indefinitely. - */ - duration = hdev->discovery.scan_duration; - scan_start = hdev->discovery.scan_start; - now = jiffies; - if (now - scan_start <= duration) { - int elapsed; - - if (now >= scan_start) - elapsed = now - scan_start; - else - elapsed = ULONG_MAX - scan_start + now; - - timeout = duration - elapsed; - } else { - timeout = 0; - } - - queue_delayed_work(hdev->req_workqueue, - &hdev->le_scan_disable, timeout); - -unlock: - hci_dev_unlock(hdev); -} static int reenable_adv_sync(struct hci_dev *hdev, void *data) { @@ -630,7 +563,6 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); INIT_WORK(&hdev->reenable_adv_work, reenable_adv); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); - INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } @@ -3800,12 +3732,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - /* Don't set Disconnect Complete when suspended as that - * would wakeup the host when disconnecting due to - * suspend. + /* Don't set Disconnect Complete and mode change when + * suspended as that would wakeup the host when disconnecting + * due to suspend. */ - if (hdev->suspended) + if (hdev->suspended) { events[0] &= 0xef; + events[2] &= 0xf7; + } } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -4960,7 +4894,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); cancel_delayed_work(&hdev->ncmd_timer); cancel_delayed_work(&hdev->le_scan_disable); - cancel_delayed_work(&hdev->le_scan_restart); hci_request_cancel_all(hdev); @@ -5178,7 +5111,6 @@ int hci_stop_discovery_sync(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); - cancel_delayed_work(&hdev->le_scan_restart); err = hci_scan_disable_sync(hdev); if (err) @@ -5686,19 +5618,18 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) if (err < 0) own_addr_type = ADDR_LE_DEV_PUBLIC; - if (hci_is_adv_monitoring(hdev)) { + if (hci_is_adv_monitoring(hdev) || + (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + hdev->discovery.result_filtering)) { /* Duplicate filter should be disabled when some advertisement * monitor is activated, otherwise AdvMon can only receive one * advertisement for one peer(*) during active scanning, and * might report loss to these peers. * - * Note that different controllers have different meanings of - * |duplicate|. Some of them consider packets with the same - * address as duplicate, and others consider packets with the - * same address and the same RSSI as duplicate. Although in the - * latter case we don't need to disable duplicate filter, but - * it is common to have active scanning for a short period of - * time, the power impact should be neglectable. + * If controller does strict duplicate filtering and the + * discovery requires result filtering disables controller based + * filtering since that can cause reports that would match the + * host filter to not be reported. */ filter_dup = LE_SCAN_FILTER_DUP_DISABLE; } @@ -5778,17 +5709,6 @@ int hci_start_discovery_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout)); - /* When service discovery is used and the controller has a - * strict duplicate filter, it is important to remember the - * start and duration of the scan. This is required for - * restarting scanning during the discovery phase. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && - hdev->discovery.result_filtering) { - hdev->discovery.scan_start = jiffies; - hdev->discovery.scan_duration = timeout; - } - queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, timeout); return 0; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 07b80e97aead..04f6572d35f1 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -54,6 +54,7 @@ static void iso_sock_kill(struct sock *sk); enum { BT_SK_BIG_SYNC, BT_SK_PA_SYNC, + BT_SK_PA_SYNC_TERM, }; struct iso_pinfo { @@ -82,6 +83,11 @@ static bool iso_match_sid(struct sock *sk, void *data); static bool iso_match_sync_handle(struct sock *sk, void *data); static void iso_sock_disconn(struct sock *sk); +typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); + +static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, + iso_sock_match_t match, void *data); + /* ---- ISO timers ---- */ #define ISO_CONN_TIMEOUT (HZ * 40) #define ISO_DISCONN_TIMEOUT (HZ * 2) @@ -190,10 +196,21 @@ static void iso_chan_del(struct sock *sk, int err) sock_set_flag(sk, SOCK_ZAPPED); } +static bool iso_match_conn_sync_handle(struct sock *sk, void *data) +{ + struct hci_conn *hcon = data; + + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) + return false; + + return hcon->sync_handle == iso_pi(sk)->sync_handle; +} + static void iso_conn_del(struct hci_conn *hcon, int err) { struct iso_conn *conn = hcon->iso_data; struct sock *sk; + struct sock *parent; if (!conn) return; @@ -209,6 +226,25 @@ static void iso_conn_del(struct hci_conn *hcon, int err) if (sk) { lock_sock(sk); + + /* While a PA sync hcon is in the process of closing, + * mark parent socket with a flag, so that any residual + * BIGInfo adv reports that arrive before PA sync is + * terminated are not processed anymore. + */ + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + parent = iso_get_sock_listen(&hcon->src, + &hcon->dst, + iso_match_conn_sync_handle, + hcon); + + if (parent) { + set_bit(BT_SK_PA_SYNC_TERM, + &iso_pi(parent)->flags); + sock_put(parent); + } + } + iso_sock_clear_timer(sk); iso_chan_del(sk, err); release_sock(sk); @@ -545,8 +581,6 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc, return NULL; } -typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); - /* Find socket listening: * source bdaddr (Unicast) * destination bdaddr (Broadcast only) @@ -574,19 +608,68 @@ static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, continue; /* Exact match. */ - if (!bacmp(&iso_pi(sk)->src, src)) + if (!bacmp(&iso_pi(sk)->src, src)) { + sock_hold(sk); break; + } /* Closest match */ - if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) + if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) { + if (sk1) + sock_put(sk1); + sk1 = sk; + sock_hold(sk1); + } } + if (sk && sk1) + sock_put(sk1); + read_unlock(&iso_sk_list.lock); return sk ? sk : sk1; } +static struct sock *iso_get_sock_big(struct sock *match_sk, bdaddr_t *src, + bdaddr_t *dst, uint8_t big) +{ + struct sock *sk = NULL; + + read_lock(&iso_sk_list.lock); + + sk_for_each(sk, &iso_sk_list.head) { + if (match_sk == sk) + continue; + + /* Look for sockets that have already been + * connected to the BIG + */ + if (sk->sk_state != BT_CONNECTED && + sk->sk_state != BT_CONNECT) + continue; + + /* Match Broadcast destination */ + if (bacmp(&iso_pi(sk)->dst, dst)) + continue; + + /* Match BIG handle */ + if (iso_pi(sk)->qos.bcast.big != big) + continue; + + /* Match source address */ + if (bacmp(&iso_pi(sk)->src, src)) + continue; + + sock_hold(sk); + break; + } + + read_unlock(&iso_sk_list.lock); + + return sk; +} + static void iso_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); @@ -639,6 +722,28 @@ static void iso_sock_kill(struct sock *sk) static void iso_sock_disconn(struct sock *sk) { + struct sock *bis_sk; + struct hci_conn *hcon = iso_pi(sk)->conn->hcon; + + if (test_bit(HCI_CONN_BIG_CREATED, &hcon->flags)) { + bis_sk = iso_get_sock_big(sk, &iso_pi(sk)->src, + &iso_pi(sk)->dst, + iso_pi(sk)->qos.bcast.big); + + /* If there are any other connected sockets for the + * same BIG, just delete the sk and leave the bis + * hcon active, in case later rebinding is needed. + */ + if (bis_sk) { + hcon->state = BT_OPEN; + iso_pi(sk)->conn->hcon = NULL; + iso_sock_clear_timer(sk); + iso_chan_del(sk, bt_to_errno(hcon->abort_reason)); + sock_put(bis_sk); + return; + } + } + sk->sk_state = BT_DISCONN; iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT); iso_conn_lock(iso_pi(sk)->conn); @@ -792,27 +897,75 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid, sa->iso_bc->bc_num_bis); - if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc)) + if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc)) return -EINVAL; bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr); + + /* Check if the address type is of LE type */ + if (!bdaddr_type_is_le(sa->iso_bc->bc_bdaddr_type)) + return -EINVAL; + iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; iso_pi(sk)->sync_handle = -1; + + if (sa->iso_bc->bc_sid > 0x0f) + return -EINVAL; + iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid; + + if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS) + return -EINVAL; + iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis; - for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) { + for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) if (sa->iso_bc->bc_bis[i] < 0x01 || sa->iso_bc->bc_bis[i] > 0x1f) return -EINVAL; - memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, - iso_pi(sk)->bc_num_bis); - } + memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, + iso_pi(sk)->bc_num_bis); return 0; } +static int iso_sock_bind_pa_sk(struct sock *sk, struct sockaddr_iso *sa, + int addr_len) +{ + int err = 0; + + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } + + if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc)) { + err = -EINVAL; + goto done; + } + + if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS) { + err = -EINVAL; + goto done; + } + + iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis; + + for (int i = 0; i < iso_pi(sk)->bc_num_bis; i++) + if (sa->iso_bc->bc_bis[i] < 0x01 || + sa->iso_bc->bc_bis[i] > 0x1f) { + err = -EINVAL; + goto done; + } + + memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, + iso_pi(sk)->bc_num_bis); + +done: + return err; +} + static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -828,6 +981,15 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow the user to bind a PA sync socket to a number + * of BISes to sync to. + */ + if (sk->sk_state == BT_CONNECT2 && + test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + err = iso_sock_bind_pa_sk(sk, sa, addr_len); + goto done; + } + if (sk->sk_state != BT_OPEN) { err = -EBADFD; goto done; @@ -1694,6 +1856,7 @@ static void iso_conn_ready(struct iso_conn *conn) parent->sk_data_ready(parent); release_sock(parent); + sock_put(parent); } } @@ -1759,9 +1922,20 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) /* Try to get PA sync listening socket, if it exists */ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_pa_sync_flag, NULL); - if (!sk) + + if (!sk) { sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sync_handle, ev2); + + /* If PA Sync is in process of terminating, + * do not handle any more BIGInfo adv reports. + */ + + if (sk && test_bit(BT_SK_PA_SYNC_TERM, + &iso_pi(sk)->flags)) + return lm; + } + if (sk) { int err; @@ -1778,6 +1952,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (err) { bt_dev_err(hdev, "hci_le_big_create_sync: %d", err); + sock_put(sk); sk = NULL; } } @@ -1810,6 +1985,8 @@ done: if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) *flags |= HCI_PROTO_DEFER; + sock_put(sk); + return lm; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 17ca13e8c044..60298975d5c4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6492,6 +6492,14 @@ drop: kfree_skb(skb); } +static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) +{ + struct l2cap_cmd_rej_unk rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -6517,23 +6525,25 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); - break; + l2cap_sig_send_rej(conn, cmd->ident); + skb_pull(skb, len > skb->len ? skb->len : len); + continue; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { - struct l2cap_cmd_rej_unk rej; - BT_ERR("Wrong link type (%d)", err); - - rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); + l2cap_sig_send_rej(conn, cmd->ident); } skb_pull(skb, len); } + if (skb->len > 0) { + BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, 0); + } + drop: kfree_skb(skb); } diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index 53a796ac078c..43aa01fd07b9 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -30,6 +30,15 @@ #include <net/bluetooth/bluetooth.h> +/** + * baswap() - Swaps the order of a bd address + * @dst: Pointer to a bdaddr_t struct that will store the swapped + * bd address. + * @src: Pointer to the bdaddr_t struct to be swapped. + * + * This function reverses the byte order of a Bluetooth device + * address. + */ void baswap(bdaddr_t *dst, const bdaddr_t *src) { const unsigned char *s = (const unsigned char *)src; @@ -41,7 +50,19 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src) } EXPORT_SYMBOL(baswap); -/* Bluetooth error codes to Unix errno mapping */ +/** + * bt_to_errno() - Bluetooth error codes to standard errno + * @code: Bluetooth error code to be converted + * + * This function takes a Bluetooth error code as input and convets + * it to an equivalent Unix/standard errno value. + * + * Return: + * + * If the bt error code is known, an equivalent Unix errno value + * is returned. + * If the given bt error code is not known, ENOSYS is returned. + */ int bt_to_errno(__u16 code) { switch (code) { @@ -135,10 +156,22 @@ int bt_to_errno(__u16 code) } EXPORT_SYMBOL(bt_to_errno); -/* Unix errno to Bluetooth error codes mapping */ +/** + * bt_status() - Standard errno value to Bluetooth error code + * @err: Unix/standard errno value to be converted + * + * This function converts a standard/Unix errno value to an + * equivalent Bluetooth error code. + * + * Return: Bluetooth error code. + * + * If the given errno is not found, 0x1f is returned by default + * which indicates an unspecified error. + * For err >= 0, no conversion is performed, and the same value + * is immediately returned. + */ __u8 bt_status(int err) { - /* Don't convert if already positive value */ if (err >= 0) return err; @@ -206,6 +239,10 @@ __u8 bt_status(int err) } EXPORT_SYMBOL(bt_status); +/** + * bt_info() - Log Bluetooth information message + * @format: Message's format string + */ void bt_info(const char *format, ...) { struct va_format vaf; @@ -222,6 +259,10 @@ void bt_info(const char *format, ...) } EXPORT_SYMBOL(bt_info); +/** + * bt_warn() - Log Bluetooth warning message + * @format: Message's format string + */ void bt_warn(const char *format, ...) { struct va_format vaf; @@ -238,6 +279,10 @@ void bt_warn(const char *format, ...) } EXPORT_SYMBOL(bt_warn); +/** + * bt_err() - Log Bluetooth error message + * @format: Message's format string + */ void bt_err(const char *format, ...) { struct va_format vaf; @@ -267,6 +312,10 @@ bool bt_dbg_get(void) return debug_enable; } +/** + * bt_dbg() - Log Bluetooth debugging message + * @format: Message's format string + */ void bt_dbg(const char *format, ...) { struct va_format vaf; @@ -287,6 +336,13 @@ void bt_dbg(const char *format, ...) EXPORT_SYMBOL(bt_dbg); #endif +/** + * bt_warn_ratelimited() - Log rate-limited Bluetooth warning message + * @format: Message's format string + * + * This functions works like bt_warn, but it uses rate limiting + * to prevent the message from being logged too often. + */ void bt_warn_ratelimited(const char *format, ...) { struct va_format vaf; @@ -303,6 +359,13 @@ void bt_warn_ratelimited(const char *format, ...) } EXPORT_SYMBOL(bt_warn_ratelimited); +/** + * bt_err_ratelimited() - Log rate-limited Bluetooth error message + * @format: Message's format string + * + * This functions works like bt_err, but it uses rate limiting + * to prevent the message from being logged too often. + */ void bt_err_ratelimited(const char *format, ...) { struct va_format vaf; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ba2e00646e8e..bb72ff6eb22f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2897,7 +2897,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) + /* Considering SMP over BREDR/LE, there is no need to check addr_type */ + if (key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7130,6 +7131,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; + u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7139,8 +7141,12 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (irk->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_irk(hdev, &irk->addr.bdaddr, - le_addr_type(irk->addr.type), irk->val, + addr_type, irk->val, BDADDR_ANY); } @@ -7221,6 +7227,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; + u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7255,8 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (key->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_ltk(hdev, &key->addr.bdaddr, - le_addr_type(key->addr.type), type, authenticated, + addr_type, type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9523,7 +9534,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = BDADDR_BREDR; + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9574,7 +9585,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9603,7 +9614,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9632,7 +9643,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); @@ -10134,21 +10145,6 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) return false; } -static void restart_le_scan(struct hci_dev *hdev) -{ - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, - hdev->discovery.scan_start + - hdev->discovery.scan_duration)) - return; - - queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart, - DISCOV_LE_RESTART_DELAY); -} - static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { @@ -10183,8 +10179,6 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, * scanning to ensure updated result with updated RSSI values. */ if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { - restart_le_scan(hdev); - /* Validate RSSI value against the RSSI threshold once more. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && rssi < hdev->discovery.rssi) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5f2f97de295e..1e7ea3a4b7ef 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1059,6 +1059,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { + smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1078,24 +1079,28 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { + smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { + smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { + smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { + smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1115,6 +1120,8 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { + key->link_type = hcon->type; + key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c index 5c4c369f8536..2faab44652e7 100644 --- a/net/bridge/br_cfm_netlink.c +++ b/net/bridge/br_cfm_netlink.c @@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, memset(&tx_info, 0, sizeof(tx_info)); - instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8f40de3af154..65cee0ad3c1b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -471,6 +471,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_get = br_fdb_get, .ndo_mdb_add = br_mdb_add, .ndo_mdb_del = br_mdb_del, + .ndo_mdb_del_bulk = br_mdb_del_bulk, .ndo_mdb_dump = br_mdb_dump, .ndo_mdb_get = br_mdb_get, .ndo_bridge_getlink = br_getlink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 8cc526067bc2..bc37e47ad829 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1412,6 +1412,139 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], return err; } +struct br_mdb_flush_desc { + u32 port_ifindex; + u16 vid; + u8 rt_protocol; + u8 state; + u8 state_mask; +}; + +static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), + [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT), +}; + +static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + desc->port_ifindex = entry->ifindex; + desc->vid = entry->vid; + desc->state = entry->state; + + if (!tb[MDBA_SET_ENTRY_ATTRS]) + return 0; + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_SET_ENTRY_ATTRS], + br_mdbe_attrs_del_bulk_pol, extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) + desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]); + + if (mdbe_attrs[MDBE_ATTR_RTPROT]) + desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); + + return 0; +} + +static void br_mdb_flush_host(struct net_bridge *br, + struct net_bridge_mdb_entry *mp, + const struct br_mdb_flush_desc *desc) +{ + u8 state; + + if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex) + return; + + if (desc->rt_protocol) + return; + + state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0; + if (desc->state_mask && (state & desc->state_mask) != desc->state) + return; + + br_multicast_host_leave(mp, true); + if (!mp->ports && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); +} + +static void br_mdb_flush_pgs(struct net_bridge *br, + struct net_bridge_mdb_entry *mp, + const struct br_mdb_flush_desc *desc) +{ + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) { + u8 state; + + if (desc->port_ifindex && + desc->port_ifindex != p->key.port->dev->ifindex) { + pp = &p->next; + continue; + } + + if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) { + pp = &p->next; + continue; + } + + state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0; + if (desc->state_mask && + (state & desc->state_mask) != desc->state) { + pp = &p->next; + continue; + } + + br_multicast_del_pg(mp, p, pp); + } +} + +static void br_mdb_flush(struct net_bridge *br, + const struct br_mdb_flush_desc *desc) +{ + struct net_bridge_mdb_entry *mp; + + spin_lock_bh(&br->multicast_lock); + + /* Safe variant is not needed because entries are removed from the list + * upon group timer expiration or bridge deletion. + */ + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { + if (desc->vid && desc->vid != mp->addr.vid) + continue; + + br_mdb_flush_host(br, mp, desc); + br_mdb_flush_pgs(br, mp, desc); + } + + spin_unlock_bh(&br->multicast_lock); +} + +int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + struct br_mdb_flush_desc desc = {}; + int err; + + err = br_mdb_flush_desc_init(&desc, tb, extack); + if (err) + return err; + + br_mdb_flush(br, &desc); + + return 0; +} + static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 051ea81864ac..b0a92c344722 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1022,6 +1022,8 @@ int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); +int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, @@ -1430,6 +1432,12 @@ static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], return -EOPNOTSUPP; } +static inline int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { diff --git a/net/core/dev.c b/net/core/dev.c index 0432b04cf9b0..f01a9b858347 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3471,6 +3471,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + return features & ~NETIF_F_GSO_MASK; + if (!skb_shinfo(skb)->gso_type) { skb_warn_bad_offload(skb); return features & ~NETIF_F_GSO_MASK; @@ -3753,6 +3756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); + tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP); + if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && qdisc_run_begin(q)) { @@ -3782,7 +3787,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, no_lock_out: if (unlikely(to_free)) kfree_skb_list_reason(to_free, - SKB_DROP_REASON_QDISC_DROP); + tcf_get_drop_reason(to_free)); return rc; } @@ -3837,7 +3842,8 @@ no_lock_out: } spin_unlock(root_lock); if (unlikely(to_free)) - kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP); + kfree_skb_list_reason(to_free, + tcf_get_drop_reason(to_free)); if (unlikely(contended)) spin_unlock(&q->busylock); return rc; @@ -3923,14 +3929,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; - res.drop_reason = *drop_reason; + tcf_set_drop_reason(skb, *drop_reason); mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: - *drop_reason = res.drop_reason; + *drop_reason = tcf_get_drop_reason(skb); mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -11620,6 +11626,7 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom); @@ -11633,7 +11640,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); /* TXRX read-mostly hotpath */ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); @@ -11660,7 +11667,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); } /* diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index b240d9aae4a6..b0f221d658be 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -183,7 +183,7 @@ out: } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", .cap_sys_admin = 1 }, + { .name = "events", .flags = GENL_MCAST_CAP_SYS_ADMIN, }, }; static void send_dm_alert(struct work_struct *work) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 75282222e0b4..96622bfb838a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -53,7 +53,7 @@ bool fib_rule_matchall(const struct fib_rule *rule) EXPORT_SYMBOL_GPL(fib_rule_matchall); int fib_default_rule_add(struct fib_rules_ops *ops, - u32 pref, u32 table, u32 flags) + u32 pref, u32 table) { struct fib_rule *r; @@ -65,7 +65,6 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->action = FR_ACT_TO_TBL; r->pref = pref; r->table = table; - r->flags = flags; r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; diff --git a/net/core/filter.c b/net/core/filter.c index 4ff6100c6a27..24061f29c9dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,7 +87,7 @@ #include "dev.h" static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); +bpf_sk_base_func_proto(enum bpf_func_id func_id); int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) { @@ -203,7 +203,7 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) return 0; nla = (struct nlattr *) &skb->data[a]; - if (nla->nla_len > skb->len - a) + if (!nla_ok(nla, skb->len - a)) return 0; nla = nla_find_nested(nla, x); @@ -7862,7 +7862,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -7955,7 +7955,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -7974,7 +7974,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8161,7 +8161,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8220,7 +8220,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) @@ -8281,7 +8281,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8323,7 +8323,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_cgroup_classid_curr_proto; #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8367,7 +8367,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_lookup_tcp_proto; #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8378,7 +8378,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_load_bytes: return &bpf_flow_dissector_load_bytes_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8405,7 +8405,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8580,7 +8580,7 @@ static bool cg_skb_is_valid_access(int off, int size, return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return false; break; } @@ -8592,7 +8592,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; case bpf_ctx_range(struct __sk_buff, tstamp): - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return false; break; default: @@ -11236,7 +11236,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -11418,7 +11418,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_sk_release: return &bpf_sk_release_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -11752,7 +11752,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = { }; static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_sk_base_func_proto(enum bpf_func_id func_id) { const struct bpf_func_proto *func; @@ -11781,10 +11781,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } - if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (!perfmon_capable()) return NULL; return func; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 57cea67b7562..ea55a758a475 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3669,10 +3669,8 @@ static int pktgen_thread_worker(void *arg) if (unlikely(!pkt_dev && t->control == 0)) { if (t->net->pktgen_exiting) break; - wait_event_interruptible_timeout(t->queue, - t->control != 0, - HZ/10); - try_to_freeze(); + wait_event_freezable_timeout(t->queue, + t->control != 0, HZ / 10); continue; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 94c4572512b8..5f6ed6da3cfc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6410,17 +6410,64 @@ static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); } +static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + struct br_mdb_entry zero_entry = {}; + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be set"); + return -EINVAL; + } + + if (entry->vid >= VLAN_N_VID - 1) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) { + NL_SET_ERR_MSG(extack, "Entry address cannot be set"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_del_bulk, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; struct net *net = sock_net(skb->sk); struct br_port_msg *bpm; struct net_device *dev; int err; - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (!del_bulk) + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, + extack); + else + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, + mdba_del_bulk_policy, extack); if (err) return err; @@ -6441,6 +6488,14 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (del_bulk) { + if (!dev->netdev_ops->ndo_mdb_del_bulk) { + NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion"); + return -EOPNOTSUPP; + } + return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack); + } + if (!dev->netdev_ops->ndo_mdb_del) { NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); return -EOPNOTSUPP; @@ -6686,5 +6741,6 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, + RTNL_FLAG_BULK_DEL_SUPPORTED); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4d4b11b0a83d..12d22c0b8551 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4861,7 +4861,9 @@ static __always_inline unsigned int skb_ext_total_length(void) static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); +#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); +#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), @@ -5993,6 +5995,31 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) } EXPORT_SYMBOL(skb_ensure_writable); +int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev) +{ + int needed_headroom = dev->needed_headroom; + int needed_tailroom = dev->needed_tailroom; + + /* For tail taggers, we need to pad short frames ourselves, to ensure + * that the tail tag does not fail at its role of being at the end of + * the packet, once the conduit interface pads the frame. Account for + * that pad length here, and pad later. + */ + if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) + needed_tailroom += ETH_ZLEN - skb->len; + /* skb_headroom() returns unsigned int... */ + needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); + needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); + + if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) + /* No reallocation needed, yay! */ + return 0; + + return pskb_expand_head(skb, needed_headroom, needed_tailroom, + GFP_ATOMIC); +} +EXPORT_SYMBOL(skb_ensure_writable_head_tail); + /* remove VLAN header from packet and update csum accordingly. * expects a non skb_vlan_tag_present skb with a vlan tag payload */ diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4292c2ed1828..27d733c0f65e 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -536,6 +536,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_stream_unix(sk)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } diff --git a/net/core/stream.c b/net/core/stream.c index 96fbcb9bbb30..b16dfa568a2d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -79,7 +79,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); - return 0; + return done < 0 ? done : 0; } EXPORT_SYMBOL(sk_stream_wait_connect); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 06d7324276ec..ded07e09f813 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -889,7 +889,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); + net_dbg_ratelimited("connect: ipv4 mapped\n"); if (ipv6_only_sock(sk)) return -ENETUNREACH; diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 918a0395b03e..19dbf540748a 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -202,7 +202,10 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); + WARN_ON(!devl_is_registered(devlink)); + + if (!devlink_nl_notify_need(devlink)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -214,8 +217,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info) @@ -999,7 +1001,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1010,8 +1012,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, if (err) goto out_free_msg; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); return; out_free_msg: diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 5ea2e2012e93..c7a8e13f917c 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -91,10 +91,15 @@ extern struct genl_family devlink_nl_family; struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); +static inline bool __devl_is_registered(struct devlink *devlink) +{ + return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); +} + static inline bool devl_is_registered(struct devlink *devlink) { devl_assert_locked(devlink); - return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + return __devl_is_registered(devlink); } static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) @@ -180,6 +185,58 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); +static inline bool devlink_nl_notify_need(struct devlink *devlink) +{ + return genl_has_listeners(&devlink_nl_family, devlink_net(devlink), + DEVLINK_MCGRP_CONFIG); +} + +struct devlink_obj_desc { + struct rcu_head rcu; + const char *bus_name; + const char *dev_name; + unsigned int port_index; + bool port_index_valid; + long data[]; +}; + +static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, + struct devlink *devlink) +{ + memset(desc, 0, sizeof(*desc)); + desc->bus_name = devlink->dev->bus->name; + desc->dev_name = dev_name(devlink->dev); +} + +static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, + struct devlink_port *devlink_port) +{ + desc->port_index = devlink_port->index; + desc->port_index_valid = true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data); + +static inline void devlink_nl_notify_send_desc(struct devlink *devlink, + struct sk_buff *msg, + struct devlink_obj_desc *desc) +{ + genlmsg_multicast_netns_filtered(&devlink_nl_family, + devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, + GFP_KERNEL, + devlink_nl_notify_filter, desc); +} + +static inline void devlink_nl_notify_send(struct devlink *devlink, + struct sk_buff *msg) +{ + struct devlink_obj_desc desc; + + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_notify_send_desc(devlink, msg, &desc); +} + /* Notify */ void devlink_notify_register(struct devlink *devlink); void devlink_notify_unregister(struct devlink *devlink); diff --git a/net/devlink/health.c b/net/devlink/health.c index 71ae121dc739..acb8c0e174bb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -490,12 +490,16 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, enum devlink_command cmd) { struct devlink *devlink = reporter->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); ASSERT_DEVLINK_REGISTERED(devlink); + if (!devlink_nl_notify_need(devlink)) + return; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -506,8 +510,10 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_obj_desc_init(&desc, devlink); + if (reporter->devlink_port) + devlink_nl_obj_desc_port_set(&desc, reporter->devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } void diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 2f1c317b64cd..67f70a621d27 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -136,7 +136,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard, WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW && cmd != DEVLINK_CMD_LINECARD_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -150,8 +150,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_linecards_notify_register(struct devlink *devlink) diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fa9afe3e6d9b..499885c8b9ca 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -17,6 +17,119 @@ static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; +struct devlink_nl_sock_priv { + struct devlink_obj_desc __rcu *flt; + spinlock_t flt_lock; /* Protects flt. */ +}; + +static void devlink_nl_sock_priv_init(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + + spin_lock_init(&sk_priv->flt_lock); +} + +static void devlink_nl_sock_priv_destroy(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + struct devlink_obj_desc *flt; + + flt = rcu_dereference_protected(sk_priv->flt, true); + kfree_rcu(flt, rcu); +} + +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_nl_sock_priv *sk_priv; + struct nlattr **attrs = info->attrs; + struct devlink_obj_desc *flt; + size_t data_offset = 0; + size_t data_size = 0; + char *pos; + + if (attrs[DEVLINK_ATTR_BUS_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_BUS_NAME]) + 1); + if (attrs[DEVLINK_ATTR_DEV_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_DEV_NAME]) + 1); + + flt = kzalloc(size_add(sizeof(*flt), data_size), GFP_KERNEL); + if (!flt) + return -ENOMEM; + + pos = (char *) flt->data; + if (attrs[DEVLINK_ATTR_BUS_NAME]) { + data_offset += nla_strscpy(pos, + attrs[DEVLINK_ATTR_BUS_NAME], + data_size) + 1; + flt->bus_name = pos; + pos += data_offset; + } + if (attrs[DEVLINK_ATTR_DEV_NAME]) { + nla_strscpy(pos, attrs[DEVLINK_ATTR_DEV_NAME], + data_size - data_offset); + flt->dev_name = pos; + } + + if (attrs[DEVLINK_ATTR_PORT_INDEX]) { + flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); + flt->port_index_valid = true; + } + + /* Don't attach empty filter. */ + if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) { + kfree(flt); + flt = NULL; + } + + sk_priv = genl_sk_priv_get(&devlink_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) { + kfree(flt); + return PTR_ERR(sk_priv); + } + spin_lock(&sk_priv->flt_lock); + flt = rcu_replace_pointer(sk_priv->flt, flt, + lockdep_is_held(&sk_priv->flt_lock)); + spin_unlock(&sk_priv->flt_lock); + kfree_rcu(flt, rcu); + return 0; +} + +static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, + const struct devlink_obj_desc *flt) +{ + if (desc->bus_name && flt->bus_name && + strcmp(desc->bus_name, flt->bus_name)) + return false; + if (desc->dev_name && flt->dev_name && + strcmp(desc->dev_name, flt->dev_name)) + return false; + if (desc->port_index_valid && flt->port_index_valid && + desc->port_index != flt->port_index) + return false; + return true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct devlink_obj_desc *desc = data; + struct devlink_nl_sock_priv *sk_priv; + struct devlink_obj_desc *flt; + int ret = 0; + + rcu_read_lock(); + sk_priv = __genl_sk_priv_get(&devlink_nl_family, dsk); + if (!IS_ERR_OR_NULL(sk_priv)) { + flt = rcu_dereference(sk_priv->flt); + if (flt) + ret = !devlink_obj_desc_match(desc, flt); + } + rcu_read_unlock(); + return ret; +} + int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype) { @@ -256,4 +369,7 @@ struct genl_family devlink_nl_family __ro_after_init = { .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, .mcgrps = devlink_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), + .sock_priv_size = sizeof(struct devlink_nl_sock_priv), + .sock_priv_init = devlink_nl_sock_priv_init, + .sock_priv_destroy = devlink_nl_sock_priv_destroy, }; diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 95f9b4350ab7..c81cf2dd154f 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -560,8 +560,15 @@ static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELF [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), }; +/* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[73] = { +const struct genl_split_ops devlink_nl_ops[74] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -1233,4 +1240,11 @@ const struct genl_split_ops devlink_nl_ops[73] = { .maxattr = DEVLINK_ATTR_SELFTESTS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, + .doit = devlink_nl_notify_filter_set_doit, + .policy = devlink_notify_filter_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_CMD_CAP_DO, + }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 02f3c0bfae0e..8f2bd50ddf5e 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -16,7 +16,7 @@ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_F extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[73]; +extern const struct genl_split_ops devlink_nl_ops[74]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -142,5 +142,7 @@ int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info); #endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/net/devlink/param.c b/net/devlink/param.c index d74df09311a9..22bc3b500518 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -343,7 +343,7 @@ static void devlink_param_notify(struct devlink *devlink, * will replay the notifications if the params are added/removed * outside of the lifetime of the instance. */ - if (!devl_is_registered(devlink)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -356,8 +356,7 @@ static void devlink_param_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } static void devlink_params_notify(struct devlink *devlink, diff --git a/net/devlink/port.c b/net/devlink/port.c index 7634f187fa50..62e54e152ecf 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -507,12 +507,13 @@ static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { struct devlink *devlink = devlink_port->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -525,8 +526,9 @@ static void devlink_port_notify(struct devlink_port *devlink_port, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_obj_desc_port_set(&desc, devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } static void devlink_ports_notify(struct devlink *devlink, diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 94b289b93ff2..7139e67e93ae 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -146,7 +146,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -159,8 +159,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_rates_notify_register(struct devlink *devlink) diff --git a/net/devlink/region.c b/net/devlink/region.c index e3bab458db94..7319127c5913 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -234,15 +234,15 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_regions_notify_register(struct devlink *devlink) diff --git a/net/devlink/trap.c b/net/devlink/trap.c index c26313e7ca08..5d18c7424df1 100644 --- a/net/devlink/trap.c +++ b/net/devlink/trap.c @@ -1173,7 +1173,8 @@ devlink_trap_group_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && cmd != DEVLINK_CMD_TRAP_GROUP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1187,8 +1188,7 @@ devlink_trap_group_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_trap_groups_notify_register(struct devlink *devlink) @@ -1234,7 +1234,8 @@ static void devlink_trap_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && cmd != DEVLINK_CMD_TRAP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1247,8 +1248,7 @@ static void devlink_trap_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_traps_notify_register(struct devlink *devlink) @@ -1710,7 +1710,8 @@ devlink_trap_policer_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW && cmd != DEVLINK_CMD_TRAP_POLICER_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1724,8 +1725,7 @@ devlink_trap_policer_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_trap_policers_notify_register(struct devlink *devlink) diff --git a/net/dsa/user.c b/net/dsa/user.c index d438884a4eb0..b738a466e2dc 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -920,30 +920,6 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) } EXPORT_SYMBOL_GPL(dsa_enqueue_skb); -static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) -{ - int needed_headroom = dev->needed_headroom; - int needed_tailroom = dev->needed_tailroom; - - /* For tail taggers, we need to pad short frames ourselves, to ensure - * that the tail tag does not fail at its role of being at the end of - * the packet, once the conduit interface pads the frame. Account for - * that pad length here, and pad later. - */ - if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) - needed_tailroom += ETH_ZLEN - skb->len; - /* skb_headroom() returns unsigned int... */ - needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); - needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); - - if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) - /* No reallocation needed, yay! */ - return 0; - - return pskb_expand_head(skb, needed_headroom, needed_tailroom, - GFP_ATOMIC); -} - static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_user_priv *p = netdev_priv(dev); @@ -956,13 +932,14 @@ static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) /* Handle tx timestamp if any */ dsa_skb_tx_timestamp(p, skb); - if (dsa_realloc_skb(skb, dev)) { + if (skb_ensure_writable_head_tail(skb, dev)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } /* needed_tailroom should still be 'warm' in the cache line from - * dsa_realloc_skb(), which has also ensured that padding is safe. + * skb_ensure_writable_head_tail(), which has also ensured that + * padding is safe. */ if (dev->needed_tailroom) eth_skb_pad(skb); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 86d47425038b..4bc9a2a07bbb 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -973,32 +973,35 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rxfh_param rxfh = {}; struct ethtool_rxnfc info; size_t info_size = sizeof(info); int rc; - if (!ops->set_rxnfc || !ops->get_rxfh) + if (!ops->set_rxnfc) return -EOPNOTSUPP; rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); if (rc) return rc; - rc = ops->get_rxfh(dev, &rxfh); - if (rc) - return rc; + if (ops->get_rxfh) { + struct ethtool_rxfh_param rxfh = {}; - /* Sanity check: if symmetric-xor is set, then: - * 1 - no other fields besides IP src/dst and/or L4 src/dst - * 2 - If src is set, dst must also be set - */ - if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && - ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) || - (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || - (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) - return -EINVAL; + rc = ops->get_rxfh(dev, &rxfh); + if (rc) + return rc; + + /* Sanity check: if symmetric-xor is set, then: + * 1 - no other fields besides IP src/dst and/or L4 src/dst + * 2 - If src is set, dst must also be set + */ + if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) || + (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || + (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) + return -EINVAL; + } rc = ops->set_rxnfc(dev, &info); if (rc) @@ -1252,6 +1255,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) { ret = -EFAULT; } else if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, input_xfrm), + &rxfh_dev.input_xfrm, + sizeof(rxfh.input_xfrm))) { + ret = -EFAULT; + } else if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_config[0]), rss_config, total_size)) { ret = -EFAULT; @@ -1299,14 +1307,16 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key or function. + * Must request at least one change: indir size, hash key, function + * or input transformation. */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || (rxfh.key_size && (rxfh.key_size != dev_key_size)) || (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && - rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE)) + rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && + rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index f05b7bdae2aa..7bce67673e83 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o obj-y += 6lowpan/ ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ - header_ops.o sysfs.o nl802154.o trace.o + header_ops.o sysfs.o nl802154.o trace.o pan.o ieee802154_socket-y := socket.o CFLAGS_trace.o := -I$(src) diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 57546e07e06a..60e8fff1347e 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -198,6 +198,25 @@ void wpan_phy_free(struct wpan_phy *phy) } EXPORT_SYMBOL(wpan_phy_free); +static void cfg802154_free_peer_structures(struct wpan_dev *wpan_dev) +{ + struct ieee802154_pan_device *child, *tmp; + + mutex_lock(&wpan_dev->association_lock); + + kfree(wpan_dev->parent); + wpan_dev->parent = NULL; + + list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) { + list_del(&child->node); + kfree(child); + } + + wpan_dev->nchildren = 0; + + mutex_unlock(&wpan_dev->association_lock); +} + int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, struct net *net) { @@ -276,6 +295,9 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; + mutex_init(&wpan_dev->association_lock); + INIT_LIST_HEAD(&wpan_dev->children); + wpan_dev->max_associations = SZ_16K; wpan_dev->netdev = dev; break; @@ -291,6 +313,8 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, rdev->opencount++; break; case NETDEV_UNREGISTER: + cfg802154_free_peer_structures(wpan_dev); + /* It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check * that the interface is still on the list diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 1a265a421308..7eb37de3add2 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -234,6 +234,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { NL802154_SCAN_DONE_REASON_ABORTED), [NL802154_ATTR_BEACON_INTERVAL] = NLA_POLICY_MAX(NLA_U8, IEEE802154_ACTIVE_SCAN_DURATION), + [NL802154_ATTR_MAX_ASSOCIATIONS] = { .type = NLA_U32 }, + [NL802154_ATTR_PEER] = { .type = NLA_NESTED }, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, }, @@ -248,7 +250,6 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; -#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static int nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -307,7 +308,6 @@ nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev) { rtnl_unlock(); } -#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ /* message building helper */ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, @@ -1087,15 +1087,14 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]); - /* TODO - * I am not sure about to check here on broadcast pan_id. - * Broadcast is a valid setting, comment from 802.15.4: - * If this value is 0xffff, the device is not associated. - * - * This could useful to simple deassociate an device. + /* Only allow changing the PAN ID when the device has no more + * associations ongoing to avoid confusing peers. */ - if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)) + if (cfg802154_device_is_associated(wpan_dev)) { + NL_SET_ERR_MSG(info->extack, + "Existing associations, changing PAN ID forbidden"); return -EINVAL; + } return rdev_set_pan_id(rdev, wpan_dev, pan_id); } @@ -1123,20 +1122,17 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info) short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]); - /* TODO - * I am not sure about to check here on broadcast short_addr. - * Broadcast is a valid setting, comment from 802.15.4: - * A value of 0xfffe indicates that the device has - * associated but has not been allocated an address. A - * value of 0xffff indicates that the device does not - * have a short address. - * - * I think we should allow to set these settings but - * don't allow to allow socket communication with it. + /* The short address only has a meaning when part of a PAN, after a + * proper association procedure. However, we want to still offer the + * possibility to create static networks so changing the short address + * is only allowed when not already associated to other devices with + * the official handshake. */ - if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) || - short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)) + if (cfg802154_device_is_associated(wpan_dev)) { + NL_SET_ERR_MSG(info->extack, + "Existing associations, changing short address forbidden"); return -EINVAL; + } return rdev_set_short_addr(rdev, wpan_dev, short_addr); } @@ -1638,6 +1634,189 @@ nl802154_stop_beacons(struct sk_buff *skb, struct genl_info *info) return rdev_stop_beacons(rdev, wpan_dev); } +static int nl802154_associate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev; + struct wpan_phy *wpan_phy; + struct ieee802154_addr coord; + int err; + + wpan_dev = dev->ieee802154_ptr; + wpan_phy = &rdev->wpan_phy; + + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + + if (!info->attrs[NL802154_ATTR_PAN_ID] || + !info->attrs[NL802154_ATTR_EXTENDED_ADDR]) + return -EINVAL; + + coord.pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]); + coord.mode = IEEE802154_ADDR_LONG; + coord.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]); + + mutex_lock(&wpan_dev->association_lock); + err = rdev_associate(rdev, wpan_dev, &coord); + mutex_unlock(&wpan_dev->association_lock); + if (err) + pr_err("Association with PAN ID 0x%x failed (%d)\n", + le16_to_cpu(coord.pan_id), err); + + return err; +} + +static int nl802154_disassociate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev = dev->ieee802154_ptr; + struct wpan_phy *wpan_phy = &rdev->wpan_phy; + struct ieee802154_addr target; + + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + + target.pan_id = wpan_dev->pan_id; + + if (info->attrs[NL802154_ATTR_EXTENDED_ADDR]) { + target.mode = IEEE802154_ADDR_LONG; + target.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]); + } else if (info->attrs[NL802154_ATTR_SHORT_ADDR]) { + target.mode = IEEE802154_ADDR_SHORT; + target.short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]); + } else { + NL_SET_ERR_MSG(info->extack, "Device address is missing"); + return -EINVAL; + } + + mutex_lock(&wpan_dev->association_lock); + rdev_disassociate(rdev, wpan_dev, &target); + mutex_unlock(&wpan_dev->association_lock); + + return 0; +} + +static int nl802154_set_max_associations(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev = dev->ieee802154_ptr; + unsigned int max_assoc; + + if (!info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]) { + NL_SET_ERR_MSG(info->extack, "No maximum number of association given"); + return -EINVAL; + } + + max_assoc = nla_get_u32(info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]); + + mutex_lock(&wpan_dev->association_lock); + cfg802154_set_max_associations(wpan_dev, max_assoc); + mutex_unlock(&wpan_dev->association_lock); + + return 0; +} + +static int nl802154_send_peer_info(struct sk_buff *msg, + struct netlink_callback *cb, + u32 seq, int flags, + struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + struct ieee802154_pan_device *peer, + enum nl802154_peer_type type) +{ + struct nlattr *nla; + void *hdr; + + ASSERT_RTNL(); + + hdr = nl802154hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, + NL802154_CMD_LIST_ASSOCIATIONS); + if (!hdr) + return -ENOBUFS; + + genl_dump_check_consistent(cb, hdr); + + nla = nla_nest_start_noflag(msg, NL802154_ATTR_PEER); + if (!nla) + goto nla_put_failure; + + if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_PEER_TYPE, type)) + goto nla_put_failure; + + if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_MODE, peer->mode)) + goto nla_put_failure; + + if (nla_put(msg, NL802154_DEV_ADDR_ATTR_SHORT, + IEEE802154_SHORT_ADDR_LEN, &peer->short_addr)) + goto nla_put_failure; + + if (nla_put(msg, NL802154_DEV_ADDR_ATTR_EXTENDED, + IEEE802154_EXTENDED_ADDR_LEN, &peer->extended_addr)) + goto nla_put_failure; + + nla_nest_end(msg, nla); + + genlmsg_end(msg, hdr); + + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl802154_list_associations(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct cfg802154_registered_device *rdev; + struct ieee802154_pan_device *child; + struct wpan_dev *wpan_dev; + int err; + + err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev); + if (err) + return err; + + mutex_lock(&wpan_dev->association_lock); + + if (cb->args[2]) + goto out; + + if (wpan_dev->parent) { + err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, rdev, wpan_dev, + wpan_dev->parent, + NL802154_PEER_TYPE_PARENT); + if (err < 0) + goto out_err; + } + + list_for_each_entry(child, &wpan_dev->children, node) { + err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, rdev, wpan_dev, + child, + NL802154_PEER_TYPE_CHILD); + if (err < 0) + goto out_err; + } + + cb->args[2] = 1; +out: + err = skb->len; +out_err: + mutex_unlock(&wpan_dev->association_lock); + + nl802154_finish_wpan_dev_dump(rdev); + + return err; +} + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = { [NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 }, @@ -2759,6 +2938,34 @@ static const struct genl_ops nl802154_ops[] = { NL802154_FLAG_CHECK_NETDEV_UP | NL802154_FLAG_NEED_RTNL, }, + { + .cmd = NL802154_CMD_ASSOCIATE, + .doit = nl802154_associate, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_CHECK_NETDEV_UP | + NL802154_FLAG_NEED_RTNL, + }, + { + .cmd = NL802154_CMD_DISASSOCIATE, + .doit = nl802154_disassociate, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_CHECK_NETDEV_UP | + NL802154_FLAG_NEED_RTNL, + }, + { + .cmd = NL802154_CMD_SET_MAX_ASSOCIATIONS, + .doit = nl802154_set_max_associations, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_NEED_RTNL, + }, + { + .cmd = NL802154_CMD_LIST_ASSOCIATIONS, + .dumpit = nl802154_list_associations, + /* can be retrieved by unprivileged users */ + }, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL { .cmd = NL802154_CMD_SET_SEC_PARAMS, diff --git a/net/ieee802154/pan.c b/net/ieee802154/pan.c new file mode 100644 index 000000000000..249df7364b3e --- /dev/null +++ b/net/ieee802154/pan.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IEEE 802.15.4 PAN management + * + * Copyright (C) 2023 Qorvo US, Inc + * Authors: + * - David Girault <david.girault@qorvo.com> + * - Miquel Raynal <miquel.raynal@bootlin.com> + */ + +#include <linux/kernel.h> +#include <net/cfg802154.h> +#include <net/af_ieee802154.h> + +/* Checks whether a device address matches one from the PAN list. + * This helper is meant to be used only during PAN management, when we expect + * extended addresses to be used. + */ +static bool cfg802154_pan_device_is_matching(struct ieee802154_pan_device *pan_dev, + struct ieee802154_addr *ext_dev) +{ + if (!pan_dev || !ext_dev) + return false; + + if (ext_dev->mode == IEEE802154_ADDR_SHORT) + return false; + + return pan_dev->extended_addr == ext_dev->extended_addr; +} + +bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev) +{ + bool is_assoc; + + mutex_lock(&wpan_dev->association_lock); + is_assoc = !list_empty(&wpan_dev->children) || wpan_dev->parent; + mutex_unlock(&wpan_dev->association_lock); + + return is_assoc; +} + +bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + lockdep_assert_held(&wpan_dev->association_lock); + + return cfg802154_pan_device_is_matching(wpan_dev->parent, target); +} +EXPORT_SYMBOL_GPL(cfg802154_device_is_parent); + +struct ieee802154_pan_device * +cfg802154_device_is_child(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + struct ieee802154_pan_device *child; + + lockdep_assert_held(&wpan_dev->association_lock); + + list_for_each_entry(child, &wpan_dev->children, node) + if (cfg802154_pan_device_is_matching(child, target)) + return child; + + return NULL; +} +EXPORT_SYMBOL_GPL(cfg802154_device_is_child); + +__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev) +{ + struct ieee802154_pan_device *child; + __le16 addr; + + lockdep_assert_held(&wpan_dev->association_lock); + + do { + get_random_bytes(&addr, 2); + if (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST) || + addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)) + continue; + + if (wpan_dev->short_addr == addr) + continue; + + if (wpan_dev->parent && wpan_dev->parent->short_addr == addr) + continue; + + list_for_each_entry(child, &wpan_dev->children, node) + if (child->short_addr == addr) + continue; + + break; + } while (1); + + return addr; +} +EXPORT_SYMBOL_GPL(cfg802154_get_free_short_addr); + +unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev, + unsigned int max) +{ + unsigned int old_max; + + lockdep_assert_held(&wpan_dev->association_lock); + + old_max = wpan_dev->max_associations; + wpan_dev->max_associations = max; + + return old_max; +} +EXPORT_SYMBOL_GPL(cfg802154_set_max_associations); diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 5eaae15c610e..64071ef6f57b 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -265,6 +265,36 @@ static inline int rdev_stop_beacons(struct cfg802154_registered_device *rdev, return ret; } +static inline int rdev_associate(struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord) +{ + int ret; + + if (!rdev->ops->associate) + return -EOPNOTSUPP; + + trace_802154_rdev_associate(&rdev->wpan_phy, wpan_dev, coord); + ret = rdev->ops->associate(&rdev->wpan_phy, wpan_dev, coord); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + +static inline int rdev_disassociate(struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + int ret; + + if (!rdev->ops->disassociate) + return -EOPNOTSUPP; + + trace_802154_rdev_disassociate(&rdev->wpan_phy, wpan_dev, target); + ret = rdev->ops->disassociate(&rdev->wpan_phy, wpan_dev, target); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL /* TODO this is already a nl802154, so move into ieee802154 */ static inline void diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index c16db0b326fa..62aa6465253a 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -356,6 +356,44 @@ DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons, TP_ARGS(wpan_phy, wpan_dev) ); +TRACE_EVENT(802154_rdev_associate, + TP_PROTO(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord), + TP_ARGS(wpan_phy, wpan_dev, coord), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(__le64, addr) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->addr = coord->extended_addr; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", associating with: 0x%llx", + WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) +); + +TRACE_EVENT(802154_rdev_disassociate, + TP_PROTO(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target), + TP_ARGS(wpan_phy, wpan_dev, target), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(__le64, addr) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->addr = target->extended_addr; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", disassociating with: 0x%llx", + WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) +); + TRACE_EVENT(802154_rdev_return_int, TP_PROTO(struct wpan_phy *wpan_phy, int ret), TP_ARGS(wpan_phy, ret), diff --git a/net/ife/ife.c b/net/ife/ife.c index 13bbf8cb6a39..be05b690b9ef 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) if (unlikely(!pskb_may_pull(skb, total_pull))) return NULL; + ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len); skb_set_mac_header(skb, total_pull); __skb_pull(skb, total_pull); *metalen = ifehdrln - IFE_METAHDRLEN; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 634cfafa583d..ae8b15e6896f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -191,7 +191,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 513f475c6a53..5bdd1c016009 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -395,13 +395,13 @@ static int fib_default_rules_init(struct fib_rules_ops *ops) { int err; - err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL); if (err < 0) return err; - err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN); if (err < 0) return err; - err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0); + err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT); if (err < 0) return err; return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bd325b029dd1..8e2eb1793685 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -159,8 +159,11 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk) if (sk->sk_family == AF_INET6) { int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - return addr_type != IPV6_ADDR_ANY && - addr_type != IPV6_ADDR_MAPPED; + if (addr_type == IPV6_ADDR_ANY) + return false; + + if (addr_type != IPV6_ADDR_MAPPED) + return true; } #endif return sk->sk_rcv_saddr != htonl(INADDR_ANY); @@ -213,18 +216,9 @@ static bool inet_bhash2_conflict(const struct sock *sk, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { - struct inet_timewait_sock *tw2; struct sock *sk2; - sk_for_each_bound_bhash2(sk2, &tb2->owners) { - if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, - reuseport_cb_ok, reuseport_ok)) - return true; - } - - twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { - sk2 = (struct sock *)tw2; - + sk_for_each_bound(sk2, &tb2->owners) { if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, reuseport_cb_ok, reuseport_ok)) return true; @@ -233,15 +227,20 @@ static bool inet_bhash2_conflict(const struct sock *sk, return false; } +#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \ + hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \ + sk_for_each_bound(sk2, &(__tb2)->owners) + /* This should be called only when the tb and tb2 hashbuckets' locks are held */ static int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, const struct inet_bind2_bucket *tb2, /* may be null */ bool relax, bool reuseport_ok) { - bool reuseport_cb_ok; - struct sock_reuseport *reuseport_cb; kuid_t uid = sock_i_uid((struct sock *)sk); + struct sock_reuseport *reuseport_cb; + bool reuseport_cb_ok; + struct sock *sk2; rcu_read_lock(); reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); @@ -249,32 +248,29 @@ static int inet_csk_bind_conflict(const struct sock *sk, reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); rcu_read_unlock(); - /* - * Unlike other sk lookup places we do not check + /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if + * ipv4) should have been checked already. We need to do these two + * checks separately because their spinlocks have to be acquired/released + * independently of each other, to prevent possible deadlocks + */ + if (inet_use_bhash2_on_bind(sk)) + return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, + reuseport_cb_ok, reuseport_ok); + + /* Unlike other sk lookup places we do not check * for sk_net here, since _all_ the socks listed * in tb->owners and tb2->owners list belong * to the same net - the one this bucket belongs to. */ + sk_for_each_bound_bhash(sk2, tb2, tb) { + if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; - if (!inet_use_bhash2_on_bind(sk)) { - struct sock *sk2; - - sk_for_each_bound(sk2, &tb->owners) - if (inet_bind_conflict(sk, sk2, uid, relax, - reuseport_cb_ok, reuseport_ok) && - inet_rcv_saddr_equal(sk, sk2, true)) - return true; - - return false; + if (inet_rcv_saddr_equal(sk, sk2, true)) + return true; } - /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if - * ipv4) should have been checked already. We need to do these two - * checks separately because their spinlocks have to be acquired/released - * independently of each other, to prevent possible deadlocks - */ - return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok); + return false; } /* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or @@ -457,7 +453,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, kuid_t uid = sock_i_uid(sk); bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->bhash2)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { tb->fastreuseport = FASTREUSEPORT_ANY; @@ -549,7 +545,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) } if (!found_port) { - if (!hlist_empty(&tb->owners)) { + if (!hlist_empty(&tb->bhash2)) { if (sk->sk_reuse == SK_FORCE_REUSE || (tb->fastreuse > 0 && reuse) || sk_reuseport_match(tb, sk)) @@ -569,7 +565,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, - net, head2, port, l3mdev, sk); + net, head2, tb, sk); if (!tb2) goto fail_unlock; bhash2_created = true; @@ -591,11 +587,10 @@ success: fail_unlock: if (ret) { + if (bhash2_created) + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2); if (bhash_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - if (bhash2_created) - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - tb2); } if (head2_lock_acquired) spin_unlock(&head2->lock); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 46b13962ad02..8e6b6aa0579e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1104,7 +1104,7 @@ resume_bind_walk: if (!net_eq(ib2_net(tb2), net)) continue; - sk_for_each_bound_bhash2(sk, &tb2->owners) { + sk_for_each_bound(sk, &tb2->owners) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9ff201bc4e6d..93e9193df544 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -76,7 +76,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; - INIT_HLIST_HEAD(&tb->owners); + INIT_HLIST_HEAD(&tb->bhash2); hlist_add_head(&tb->node, &head->chain); } return tb; @@ -87,7 +87,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, */ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->bhash2)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } @@ -100,47 +100,52 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net tb->l3mdev == l3mdev; } -static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, +static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb2, struct net *net, struct inet_bind_hashbucket *head, - unsigned short port, int l3mdev, + struct inet_bind_bucket *tb, const struct sock *sk) { - write_pnet(&tb->ib_net, net); - tb->l3mdev = l3mdev; - tb->port = port; + write_pnet(&tb2->ib_net, net); + tb2->l3mdev = tb->l3mdev; + tb2->port = tb->port; #if IS_ENABLED(CONFIG_IPV6) - tb->family = sk->sk_family; - if (sk->sk_family == AF_INET6) - tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; - else + BUILD_BUG_ON(USHRT_MAX < (IPV6_ADDR_ANY | IPV6_ADDR_MAPPED)); + if (sk->sk_family == AF_INET6) { + tb2->addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + tb2->v6_rcv_saddr = sk->sk_v6_rcv_saddr; + } else { + tb2->addr_type = IPV6_ADDR_MAPPED; + ipv6_addr_set_v4mapped(sk->sk_rcv_saddr, &tb2->v6_rcv_saddr); + } +#else + tb2->rcv_saddr = sk->sk_rcv_saddr; #endif - tb->rcv_saddr = sk->sk_rcv_saddr; - INIT_HLIST_HEAD(&tb->owners); - INIT_HLIST_HEAD(&tb->deathrow); - hlist_add_head(&tb->node, &head->chain); + INIT_HLIST_HEAD(&tb2->owners); + hlist_add_head(&tb2->node, &head->chain); + hlist_add_head(&tb2->bhash_node, &tb->bhash2); } struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - unsigned short port, - int l3mdev, + struct inet_bind_bucket *tb, const struct sock *sk) { - struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); + struct inet_bind2_bucket *tb2 = kmem_cache_alloc(cachep, GFP_ATOMIC); - if (tb) - inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk); + if (tb2) + inet_bind2_bucket_init(tb2, net, head, tb, sk); - return tb; + return tb2; } /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { - if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { + if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); + __hlist_del(&tb->bhash_node); kmem_cache_free(cachep, tb); } } @@ -149,18 +154,11 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb2->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && - tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; - - return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && - sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; - } - if (sk->sk_family == AF_INET6) - return ipv6_addr_equal(&tb2->v6_rcv_saddr, - &sk->sk_v6_rcv_saddr); + return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); + + if (tb2->addr_type != IPV6_ADDR_MAPPED) + return false; #endif return tb2->rcv_saddr == sk->sk_rcv_saddr; } @@ -169,10 +167,9 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { inet_sk(sk)->inet_num = port; - sk_add_bind_node(sk, &tb->owners); inet_csk(sk)->icsk_bind_hash = tb; - sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; + sk_add_bind_node(sk, &tb2->owners); } /* @@ -192,21 +189,20 @@ static void __inet_put_port(struct sock *sk) spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; - __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_lock(&head2->lock); if (inet_csk(sk)->icsk_bind2_hash) { struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash; - __sk_del_bind2_node(sk); + __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind2_hash = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); } spin_unlock(&head2->lock); + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -275,8 +271,7 @@ bhash2_find: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child); if (!tb2) { tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, - net, head2, port, - l3mdev, child); + net, head2, tb, child); if (!tb2) goto error; } @@ -836,16 +831,15 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const return false; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_any(&tb->v6_rcv_saddr) || - ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr); + if (tb->addr_type == IPV6_ADDR_ANY) + return true; + if (tb->addr_type != IPV6_ADDR_MAPPED) return false; - } - if (sk->sk_family == AF_INET6) - return ipv6_addr_any(&tb->v6_rcv_saddr); + if (sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return false; #endif return tb->rcv_saddr == 0; } @@ -942,7 +936,7 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, spin_lock_bh(&head->lock); spin_lock(&head2->lock); - __sk_del_bind2_node(sk); + __sk_del_bind_node(sk); inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); spin_unlock(&head2->lock); @@ -957,10 +951,10 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; - inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk); + inet_bind2_bucket_init(tb2, net, head2, inet_csk(sk)->icsk_bind_hash, sk); } - sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; + sk_add_bind_node(sk, &tb2->owners); spin_unlock(&head2->lock); spin_unlock_bh(&head->lock); @@ -1064,7 +1058,7 @@ other_parity_scan: if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; - WARN_ON(hlist_empty(&tb->owners)); + WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, port, &tw)) goto ok; @@ -1104,7 +1098,7 @@ ok: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, - head2, port, l3mdev, sk); + head2, tb, sk); if (!tb2) goto error; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index dd37a5bf6881..5befa4de5b24 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -35,13 +35,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, if (!tb) return; - __hlist_del(&tw->tw_bind_node); + __sk_del_bind_node((struct sock *)tw); tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); - - __hlist_del(&tw->tw_bind2_node); tw->tw_tb2 = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); __sock_put((struct sock *)tw); } @@ -94,18 +92,6 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, hlist_nulls_add_head_rcu(&tw->tw_node, list); } -static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind2_node, list); -} - /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -133,11 +119,10 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); - inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); tw->tw_tb2 = icsk->icsk_bind2_hash; WARN_ON(!icsk->icsk_bind2_hash); - inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); + sk_add_bind_node((struct sock *)tw, &tw->tw_tb2->owners); spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0063a237253b..9d6f59531b3a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -253,7 +253,7 @@ static int __net_init ipmr_rules_init(struct net *net) goto err1; } - err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); + err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT); if (err < 0) goto err2; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7c2003833010..7523c4baef35 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -475,11 +475,11 @@ static int __net_init fib6_rules_net_init(struct net *net) if (IS_ERR(ops)) return PTR_ERR(ops); - err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN); if (err) goto out_fib6_rules_ops; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7772f42ff2b9..4fc2cae0d116 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,8 +160,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); - INIT_HLIST_NODE(&f6i->gc_link); - return f6i; } @@ -248,7 +246,6 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); - INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1060,8 +1057,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } - - fib6_clean_expires_locked(rt); } /* @@ -1123,10 +1118,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) - fib6_clean_expires_locked(iter); + fib6_clean_expires(iter); else - fib6_set_expires_locked(iter, - rt->expires); + fib6_set_expires(iter, rt->expires); if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1485,10 +1479,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); - - if (fib6_has_expires(rt)) - hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist); - fib6_start_gc(info->nl_net, rt); } @@ -2291,8 +2281,9 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) +static int fib6_age(struct fib6_info *rt, void *arg) { + struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2300,7 +2291,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Routes are expired even if they are in use. */ - if (fib6_has_expires(rt) && rt->expires) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; @@ -2317,40 +2308,6 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) return 0; } -static void fib6_gc_table(struct net *net, - struct fib6_table *tb6, - struct fib6_gc_args *gc_args) -{ - struct fib6_info *rt; - struct hlist_node *n; - struct nl_info info = { - .nl_net = net, - .skip_notify = false, - }; - - hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) - if (fib6_age(rt, gc_args) == -1) - fib6_del(rt, &info); -} - -static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - spin_lock_bh(&table->tb6_lock); - fib6_gc_table(net, table, gc_args); - spin_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} - void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2366,7 +2323,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_gc_all(net, &gc_args); + fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 30ca064b76ef..9782c180fee6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -242,7 +242,7 @@ static int __net_init ip6mr_rules_init(struct net *net) goto err1; } - err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); + err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); if (err < 0) goto err2; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b132feae3393..ea1dec8448fc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3763,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->dst_nocount = true; if (cfg->fc_flags & RTF_EXPIRES) - fib6_set_expires_locked(rt, jiffies + - clock_t_to_jiffies(cfg->fc_expires)); + fib6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); else - fib6_clean_expires_locked(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 0ed6e34d6edd..6334f64f04d5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -67,7 +67,7 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv) return 0; } -struct bus_type iucv_bus = { +const struct bus_type iucv_bus = { .name = "iucv", .match = iucv_bus_match, }; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 65d1f6755f98..1184d40167b8 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -634,7 +634,7 @@ retry: msize = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - msize += skb_shinfo(skb)->frags[i].bv_len; + msize += skb_frag_size(&skb_shinfo(skb)->frags[i]); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e0a4f9eecb2c..489dd97f5172 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1788,10 +1788,10 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, lockdep_is_held(&local->hw.wiphy->mtx)); /* - * If there are no changes, then accept a link that doesn't exist, + * If there are no changes, then accept a link that exist, * unless it's a new link. */ - if (params->link_id < 0 && !new_link && + if (params->link_id >= 0 && !new_link && !params->link_mac && !params->txpwr_set && !params->supported_rates_len && !params->ht_capa && !params->vht_capa && diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 7938ec87ef25..d3820333cd59 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2015 Intel Deutschland GmbH - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <net/mac80211.h> #include "ieee80211_i.h" @@ -589,6 +589,10 @@ int drv_change_sta_links(struct ieee80211_local *local, if (ret) return ret; + /* during reconfig don't add it to debugfs again */ + if (local->in_reconfig) + return 0; + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dbabeefe4515..28bf794f67f8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1068,8 +1068,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_OPEN: if (!matches_local) event = OPN_RJCT; - if (!mesh_plink_free_count(sdata) || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + (sta->mesh->plid && sta->mesh->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; @@ -1077,9 +1077,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_CONFIRM: if (!matches_local) event = CNF_RJCT; - if (!mesh_plink_free_count(sdata) || - sta->mesh->llid != llid || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + sta->mesh->llid != llid || + (sta->mesh->plid && sta->mesh->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; @@ -1247,6 +1247,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL); - mesh_process_plink_frame(sdata, mgmt, elems, rx_status); - kfree(elems); + if (elems) { + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); + } } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5aef4f3dcbf1..073105deb424 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5796,7 +5796,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, { const struct ieee80211_multi_link_elem *ml; const struct element *sub; - size_t ml_len; + ssize_t ml_len; unsigned long removed_links = 0; u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; u8 link_id; @@ -5812,6 +5812,8 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, elems->scratch + elems->scratch_len - elems->scratch_pos, WLAN_EID_FRAGMENT); + if (ml_len < 0) + return; elems->ml_reconf = (const void *)elems->scratch_pos; elems->ml_reconf_len = ml_len; diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index 5c3cb019f751..ef7f23af043f 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -315,6 +315,179 @@ static int mac802154_stop_beacons(struct wpan_phy *wpan_phy, return mac802154_stop_beacons_locked(local, sdata); } +static int mac802154_associate(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + u64 ceaddr = swab64((__force u64)coord->extended_addr); + struct ieee802154_sub_if_data *sdata; + struct ieee802154_pan_device *parent; + __le16 short_addr; + int ret; + + ASSERT_RTNL(); + + sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev); + + if (wpan_dev->parent) { + dev_err(&sdata->dev->dev, + "Device %8phC is already associated\n", &ceaddr); + return -EPERM; + } + + if (coord->mode == IEEE802154_SHORT_ADDRESSING) + return -EINVAL; + + parent = kzalloc(sizeof(*parent), GFP_KERNEL); + if (!parent) + return -ENOMEM; + + parent->pan_id = coord->pan_id; + parent->mode = coord->mode; + parent->extended_addr = coord->extended_addr; + parent->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST); + + /* Set the PAN ID hardware address filter beforehand to avoid dropping + * the association response with a destination PAN ID field set to the + * "new" PAN ID. + */ + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_pan_id(local, coord->pan_id); + if (ret < 0) + goto free_parent; + } + + ret = mac802154_perform_association(sdata, parent, &short_addr); + if (ret) + goto reset_panid; + + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_short_addr(local, short_addr); + if (ret < 0) + goto reset_panid; + } + + wpan_dev->pan_id = coord->pan_id; + wpan_dev->short_addr = short_addr; + wpan_dev->parent = parent; + + return 0; + +reset_panid: + if (local->hw.flags & IEEE802154_HW_AFILT) + drv_set_pan_id(local, cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)); + +free_parent: + kfree(parent); + return ret; +} + +static int mac802154_disassociate_from_parent(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + struct ieee802154_pan_device *child, *tmp; + struct ieee802154_sub_if_data *sdata; + unsigned int max_assoc; + u64 eaddr; + int ret; + + sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev); + + /* Start by disassociating all the children and preventing new ones to + * attempt associations. + */ + max_assoc = cfg802154_set_max_associations(wpan_dev, 0); + list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) { + ret = mac802154_send_disassociation_notif(sdata, child, + IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE); + if (ret) { + eaddr = swab64((__force u64)child->extended_addr); + dev_err(&sdata->dev->dev, + "Disassociation with %8phC may have failed (%d)\n", + &eaddr, ret); + } + + list_del(&child->node); + } + + ret = mac802154_send_disassociation_notif(sdata, wpan_dev->parent, + IEEE802154_DEVICE_WISHES_TO_LEAVE); + if (ret) { + eaddr = swab64((__force u64)wpan_dev->parent->extended_addr); + dev_err(&sdata->dev->dev, + "Disassociation from %8phC may have failed (%d)\n", + &eaddr, ret); + } + + ret = 0; + + kfree(wpan_dev->parent); + wpan_dev->parent = NULL; + wpan_dev->pan_id = cpu_to_le16(IEEE802154_PAN_ID_BROADCAST); + wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST); + + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_pan_id(local, wpan_dev->pan_id); + if (ret < 0) + goto reset_mac_assoc; + + ret = drv_set_short_addr(local, wpan_dev->short_addr); + if (ret < 0) + goto reset_mac_assoc; + } + +reset_mac_assoc: + cfg802154_set_max_associations(wpan_dev, max_assoc); + + return ret; +} + +static int mac802154_disassociate_child(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_pan_device *child) +{ + struct ieee802154_sub_if_data *sdata; + int ret; + + sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev); + + ret = mac802154_send_disassociation_notif(sdata, child, + IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE); + if (ret) + return ret; + + list_del(&child->node); + wpan_dev->nchildren--; + kfree(child); + + return 0; +} + +static int mac802154_disassociate(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + u64 teaddr = swab64((__force u64)target->extended_addr); + struct ieee802154_pan_device *pan_device; + + ASSERT_RTNL(); + + if (cfg802154_device_is_parent(wpan_dev, target)) + return mac802154_disassociate_from_parent(wpan_phy, wpan_dev); + + pan_device = cfg802154_device_is_child(wpan_dev, target); + if (pan_device) + return mac802154_disassociate_child(wpan_phy, wpan_dev, + pan_device); + + dev_err(&wpan_dev->netdev->dev, + "Device %8phC is not associated with us\n", &teaddr); + + return -EINVAL; +} + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static void ieee802154_get_llsec_table(struct wpan_phy *wpan_phy, @@ -526,6 +699,8 @@ const struct cfg802154_ops mac802154_config_ops = { .abort_scan = mac802154_abort_scan, .send_beacons = mac802154_send_beacons, .stop_beacons = mac802154_stop_beacons, + .associate = mac802154_associate, + .disassociate = mac802154_disassociate, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL .get_llsec_table = ieee802154_get_llsec_table, .lock_llsec_table = ieee802154_lock_llsec_table, diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index c347ec9ff8c9..08dd521a51a5 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -24,6 +24,7 @@ enum ieee802154_ongoing { IEEE802154_IS_SCANNING = BIT(0), IEEE802154_IS_BEACONING = BIT(1), + IEEE802154_IS_ASSOCIATING = BIT(2), }; /* mac802154 device private data */ @@ -74,6 +75,13 @@ struct ieee802154_local { struct list_head rx_mac_cmd_list; struct work_struct rx_mac_cmd_work; + /* Association */ + struct ieee802154_pan_device *assoc_dev; + struct completion assoc_done; + __le16 assoc_addr; + u8 assoc_status; + struct work_struct assoc_work; + bool started; bool suspended; unsigned long ongoing; @@ -296,6 +304,25 @@ static inline bool mac802154_is_beaconing(struct ieee802154_local *local) void mac802154_rx_mac_cmd_worker(struct work_struct *work); +int mac802154_perform_association(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *coord, + __le16 *short_addr); +int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); + +static inline bool mac802154_is_associating(struct ieee802154_local *local) +{ + return test_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing); +} + +int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *target, + u8 reason); +int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); +int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); + /* interface handling */ int ieee802154_iface_init(void); void ieee802154_iface_exit(void); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 357ece67432b..9ab7396668d2 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -103,6 +103,8 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker); INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker); + init_completion(&local->assoc_done); + /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; phy->supported.min_maxbe = 3; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index e2434b4fe514..e40a988d6c80 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -93,6 +93,31 @@ void mac802154_rx_mac_cmd_worker(struct work_struct *work) queue_delayed_work(local->mac_wq, &local->beacon_work, 0); break; + + case IEEE802154_CMD_ASSOCIATION_RESP: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC RESP\n"); + if (!mac802154_is_associating(local)) + break; + + mac802154_process_association_resp(mac_pkt->sdata, mac_pkt->skb); + break; + + case IEEE802154_CMD_ASSOCIATION_REQ: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC REQ\n"); + if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD) + break; + + mac802154_process_association_req(mac_pkt->sdata, mac_pkt->skb); + break; + + case IEEE802154_CMD_DISASSOCIATION_NOTIFY: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing DISASSOC NOTIF\n"); + if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD) + break; + + mac802154_process_disassociation_notif(mac_pkt->sdata, mac_pkt->skb); + break; + default: break; } @@ -131,12 +156,15 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: - if (hdr->source.mode != IEEE802154_ADDR_NONE) - /* FIXME: check if we are PAN coordinator */ - skb->pkt_type = PACKET_OTHERHOST; - else + if (hdr->source.mode == IEEE802154_ADDR_NONE) /* ACK comes with both addresses empty */ skb->pkt_type = PACKET_HOST; + else if (!wpan_dev->parent) + /* No dest means PAN coordinator is the recipient */ + skb->pkt_type = PACKET_HOST; + else + /* We are not the PAN coordinator, just relaying */ + skb->pkt_type = PACKET_OTHERHOST; break; case IEEE802154_ADDR_LONG: if (mac_cb(skb)->dest.pan_id != span && diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c index d9658f2c4ae6..1c0eeaa76560 100644 --- a/net/mac802154/scan.c +++ b/net/mac802154/scan.c @@ -466,6 +466,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, struct cfg802154_beacon_request *request) { struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; ASSERT_RTNL(); @@ -495,8 +496,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, local->beacon.mac_pl.superframe_order = request->interval; local->beacon.mac_pl.final_cap_slot = 0xf; local->beacon.mac_pl.battery_life_ext = 0; - /* TODO: Fill this field with the coordinator situation in the network */ - local->beacon.mac_pl.pan_coordinator = 1; + local->beacon.mac_pl.pan_coordinator = !wpan_dev->parent; local->beacon.mac_pl.assoc_permit = 1; if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION) @@ -510,3 +510,406 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, return 0; } + +int mac802154_perform_association(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *coord, + __le16 *short_addr) +{ + u64 ceaddr = swab64((__force u64)coord->extended_addr); + struct ieee802154_association_req_frame frame = {}; + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct sk_buff *skb; + int ret; + + frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + frame.mhr.fc.security_enabled = 0; + frame.mhr.fc.frame_pending = 0; + frame.mhr.fc.ack_request = 1; /* We always expect an ack here */ + frame.mhr.fc.intra_pan = 0; + frame.mhr.fc.dest_addr_mode = (coord->mode == IEEE802154_ADDR_LONG) ? + IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING; + frame.mhr.fc.version = IEEE802154_2003_STD; + frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.source.mode = IEEE802154_ADDR_LONG; + frame.mhr.source.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); + frame.mhr.source.extended_addr = wpan_dev->extended_addr; + frame.mhr.dest.mode = coord->mode; + frame.mhr.dest.pan_id = coord->pan_id; + if (coord->mode == IEEE802154_ADDR_LONG) + frame.mhr.dest.extended_addr = coord->extended_addr; + else + frame.mhr.dest.short_addr = coord->short_addr; + frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF; + frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_REQ; + frame.assoc_req_pl.device_type = 1; + frame.assoc_req_pl.power_source = 1; + frame.assoc_req_pl.rx_on_when_idle = 1; + frame.assoc_req_pl.alloc_addr = 1; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.assoc_req_pl), + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &frame, &frame.assoc_req_pl, + sizeof(frame.assoc_req_pl)); + if (ret) { + kfree_skb(skb); + return ret; + } + + local->assoc_dev = coord; + reinit_completion(&local->assoc_done); + set_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing); + + ret = ieee802154_mlme_tx_one_locked(local, sdata, skb); + if (ret) { + if (ret > 0) + ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO; + dev_warn(&sdata->dev->dev, + "No ASSOC REQ ACK received from %8phC\n", &ceaddr); + goto clear_assoc; + } + + ret = wait_for_completion_killable_timeout(&local->assoc_done, 10 * HZ); + if (ret <= 0) { + dev_warn(&sdata->dev->dev, + "No ASSOC RESP received from %8phC\n", &ceaddr); + ret = -ETIMEDOUT; + goto clear_assoc; + } + + if (local->assoc_status != IEEE802154_ASSOCIATION_SUCCESSFUL) { + if (local->assoc_status == IEEE802154_PAN_AT_CAPACITY) + ret = -ERANGE; + else + ret = -EPERM; + + dev_warn(&sdata->dev->dev, + "Negative ASSOC RESP received from %8phC: %s\n", &ceaddr, + local->assoc_status == IEEE802154_PAN_AT_CAPACITY ? + "PAN at capacity" : "access denied"); + } + + ret = 0; + *short_addr = local->assoc_addr; + +clear_assoc: + clear_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing); + local->assoc_dev = NULL; + + return ret; +} + +int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee802154_addr *src = &mac_cb(skb)->source; + struct ieee802154_addr *dest = &mac_cb(skb)->dest; + u64 deaddr = swab64((__force u64)dest->extended_addr); + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_assoc_resp_pl resp_pl = {}; + + if (skb->len != sizeof(resp_pl)) + return -EINVAL; + + if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING || + dest->mode != IEEE802154_EXTENDED_ADDRESSING)) + return -EINVAL; + + if (unlikely(dest->extended_addr != wpan_dev->extended_addr || + src->extended_addr != local->assoc_dev->extended_addr)) + return -ENODEV; + + memcpy(&resp_pl, skb->data, sizeof(resp_pl)); + local->assoc_addr = resp_pl.short_addr; + local->assoc_status = resp_pl.status; + + dev_dbg(&skb->dev->dev, + "ASSOC RESP 0x%x received from %8phC, getting short address %04x\n", + local->assoc_status, &deaddr, local->assoc_addr); + + complete(&local->assoc_done); + + return 0; +} + +int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *target, + u8 reason) +{ + struct ieee802154_disassociation_notif_frame frame = {}; + u64 teaddr = swab64((__force u64)target->extended_addr); + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct sk_buff *skb; + int ret; + + frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + frame.mhr.fc.security_enabled = 0; + frame.mhr.fc.frame_pending = 0; + frame.mhr.fc.ack_request = 1; + frame.mhr.fc.intra_pan = 1; + frame.mhr.fc.dest_addr_mode = (target->mode == IEEE802154_ADDR_LONG) ? + IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING; + frame.mhr.fc.version = IEEE802154_2003_STD; + frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.source.mode = IEEE802154_ADDR_LONG; + frame.mhr.source.pan_id = wpan_dev->pan_id; + frame.mhr.source.extended_addr = wpan_dev->extended_addr; + frame.mhr.dest.mode = target->mode; + frame.mhr.dest.pan_id = wpan_dev->pan_id; + if (target->mode == IEEE802154_ADDR_LONG) + frame.mhr.dest.extended_addr = target->extended_addr; + else + frame.mhr.dest.short_addr = target->short_addr; + frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF; + frame.mac_pl.cmd_id = IEEE802154_CMD_DISASSOCIATION_NOTIFY; + frame.disassoc_pl = reason; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.disassoc_pl), + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &frame, &frame.disassoc_pl, + sizeof(frame.disassoc_pl)); + if (ret) { + kfree_skb(skb); + return ret; + } + + ret = ieee802154_mlme_tx_one_locked(local, sdata, skb); + if (ret) { + dev_warn(&sdata->dev->dev, + "No DISASSOC ACK received from %8phC\n", &teaddr); + if (ret > 0) + ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO; + return ret; + } + + dev_dbg(&sdata->dev->dev, "DISASSOC ACK received from %8phC\n", &teaddr); + return 0; +} + +static int +mac802154_send_association_resp_locked(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *target, + struct ieee802154_assoc_resp_pl *assoc_resp_pl) +{ + u64 teaddr = swab64((__force u64)target->extended_addr); + struct ieee802154_association_resp_frame frame = {}; + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct sk_buff *skb; + int ret; + + frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + frame.mhr.fc.security_enabled = 0; + frame.mhr.fc.frame_pending = 0; + frame.mhr.fc.ack_request = 1; /* We always expect an ack here */ + frame.mhr.fc.intra_pan = 1; + frame.mhr.fc.dest_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.fc.version = IEEE802154_2003_STD; + frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.source.mode = IEEE802154_ADDR_LONG; + frame.mhr.source.extended_addr = wpan_dev->extended_addr; + frame.mhr.dest.mode = IEEE802154_ADDR_LONG; + frame.mhr.dest.pan_id = wpan_dev->pan_id; + frame.mhr.dest.extended_addr = target->extended_addr; + frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF; + frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_RESP; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(*assoc_resp_pl), + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &frame, assoc_resp_pl, + sizeof(*assoc_resp_pl)); + if (ret) { + kfree_skb(skb); + return ret; + } + + ret = ieee802154_mlme_tx_locked(local, sdata, skb); + if (ret) { + dev_warn(&sdata->dev->dev, + "No ASSOC RESP ACK received from %8phC\n", &teaddr); + if (ret > 0) + ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO; + return ret; + } + + return 0; +} + +int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_addr *src = &mac_cb(skb)->source; + struct ieee802154_addr *dest = &mac_cb(skb)->dest; + struct ieee802154_assoc_resp_pl assoc_resp_pl = {}; + struct ieee802154_assoc_req_pl assoc_req_pl; + struct ieee802154_pan_device *child, *exchild; + struct ieee802154_addr tmp = {}; + u64 ceaddr; + int ret; + + if (skb->len != sizeof(assoc_req_pl)) + return -EINVAL; + + if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING)) + return -EINVAL; + + if (unlikely(dest->pan_id != wpan_dev->pan_id)) + return -ENODEV; + + if (dest->mode == IEEE802154_EXTENDED_ADDRESSING && + unlikely(dest->extended_addr != wpan_dev->extended_addr)) + return -ENODEV; + else if (dest->mode == IEEE802154_SHORT_ADDRESSING && + unlikely(dest->short_addr != wpan_dev->short_addr)) + return -ENODEV; + + if (wpan_dev->parent) { + dev_dbg(&sdata->dev->dev, + "Ignoring ASSOC REQ, not the PAN coordinator\n"); + return -ENODEV; + } + + mutex_lock(&wpan_dev->association_lock); + + memcpy(&assoc_req_pl, skb->data, sizeof(assoc_req_pl)); + if (assoc_req_pl.assoc_type) { + dev_err(&skb->dev->dev, "Fast associations not supported yet\n"); + ret = -EOPNOTSUPP; + goto unlock; + } + + child = kzalloc(sizeof(*child), GFP_KERNEL); + if (!child) { + ret = -ENOMEM; + goto unlock; + } + + child->extended_addr = src->extended_addr; + child->mode = IEEE802154_EXTENDED_ADDRESSING; + ceaddr = swab64((__force u64)child->extended_addr); + + if (wpan_dev->nchildren >= wpan_dev->max_associations) { + if (!wpan_dev->max_associations) + assoc_resp_pl.status = IEEE802154_PAN_ACCESS_DENIED; + else + assoc_resp_pl.status = IEEE802154_PAN_AT_CAPACITY; + assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST); + dev_dbg(&sdata->dev->dev, + "Refusing ASSOC REQ from child %8phC, %s\n", &ceaddr, + assoc_resp_pl.status == IEEE802154_PAN_ACCESS_DENIED ? + "access denied" : "too many children"); + } else { + assoc_resp_pl.status = IEEE802154_ASSOCIATION_SUCCESSFUL; + if (assoc_req_pl.alloc_addr) { + assoc_resp_pl.short_addr = cfg802154_get_free_short_addr(wpan_dev); + child->mode = IEEE802154_SHORT_ADDRESSING; + } else { + assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); + } + child->short_addr = assoc_resp_pl.short_addr; + dev_dbg(&sdata->dev->dev, + "Accepting ASSOC REQ from child %8phC, providing short address 0x%04x\n", + &ceaddr, le16_to_cpu(child->short_addr)); + } + + ret = mac802154_send_association_resp_locked(sdata, child, &assoc_resp_pl); + if (ret || assoc_resp_pl.status != IEEE802154_ASSOCIATION_SUCCESSFUL) { + kfree(child); + goto unlock; + } + + dev_dbg(&sdata->dev->dev, + "Successful association with new child %8phC\n", &ceaddr); + + /* Ensure this child is not already associated (might happen due to + * retransmissions), in this case drop the ex structure. + */ + tmp.mode = child->mode; + tmp.extended_addr = child->extended_addr; + exchild = cfg802154_device_is_child(wpan_dev, &tmp); + if (exchild) { + dev_dbg(&sdata->dev->dev, + "Child %8phC was already known\n", &ceaddr); + list_del(&exchild->node); + } + + list_add(&child->node, &wpan_dev->children); + wpan_dev->nchildren++; + +unlock: + mutex_unlock(&wpan_dev->association_lock); + return ret; +} + +int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee802154_addr *src = &mac_cb(skb)->source; + struct ieee802154_addr *dest = &mac_cb(skb)->dest; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_pan_device *child; + struct ieee802154_addr target; + bool parent; + u64 teaddr; + + if (skb->len != sizeof(u8)) + return -EINVAL; + + if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING)) + return -EINVAL; + + if (dest->mode == IEEE802154_EXTENDED_ADDRESSING && + unlikely(dest->extended_addr != wpan_dev->extended_addr)) + return -ENODEV; + else if (dest->mode == IEEE802154_SHORT_ADDRESSING && + unlikely(dest->short_addr != wpan_dev->short_addr)) + return -ENODEV; + + if (dest->pan_id != wpan_dev->pan_id) + return -ENODEV; + + target.mode = IEEE802154_EXTENDED_ADDRESSING; + target.extended_addr = src->extended_addr; + teaddr = swab64((__force u64)target.extended_addr); + dev_dbg(&skb->dev->dev, "Processing DISASSOC NOTIF from %8phC\n", &teaddr); + + mutex_lock(&wpan_dev->association_lock); + parent = cfg802154_device_is_parent(wpan_dev, &target); + if (!parent) + child = cfg802154_device_is_child(wpan_dev, &target); + if (!parent && !child) { + mutex_unlock(&wpan_dev->association_lock); + return -EINVAL; + } + + if (parent) { + kfree(wpan_dev->parent); + wpan_dev->parent = NULL; + } else { + list_del(&child->node); + kfree(child); + wpan_dev->nchildren--; + } + + mutex_unlock(&wpan_dev->association_lock); + + return 0; +} diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c index 017248dea038..220414e5c850 100644 --- a/net/mptcp/crypto_test.c +++ b/net/mptcp/crypto_test.c @@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = { kunit_test_suite(mptcp_crypto_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto"); diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index a0990c365a2e..c30405e76833 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), + SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index cae71d947252..dd7fd1f246b5 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -65,6 +65,7 @@ enum linux_mptcp_mib_field { * conflict with another subflow while updating msk rcv wnd */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ + MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ __MPTCP_MIB_MAX }; @@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net, __SNMP_INC_STATS(net->mib.mptcp_statistics, field); } +static inline void MPTCP_DEC_STATS(struct net *net, + enum linux_mptcp_mib_field field) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_DEC_STATS(net->mib.mptcp_statistics, field); +} + bool mptcp_mib_alloc(struct net *net); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index bf4d96f6f99a..287a60381eae 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); @@ -1100,7 +1105,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, - .flags = GENL_UNS_ADMIN_PERM, + .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bc81ea53a049..3ed4709a7509 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) return READ_ONCE(msk->wnd_end); } -static bool mptcp_is_tcpsk(struct sock *sk) +static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { - struct socket *sock = sk->sk_socket; - - if (unlikely(sk->sk_prot == &tcp_prot)) { - /* we are being invoked after mptcp_accept() has - * accepted a non-mp-capable flow: sk is a tcp_sk, - * not an mptcp one. - * - * Hand the socket over to tcp so all further socket ops - * bypass mptcp. - */ - WRITE_ONCE(sock->ops, &inet_stream_ops); - return true; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { - WRITE_ONCE(sock->ops, &inet6_stream_ops); - return true; + if (sk->sk_prot == &tcpv6_prot) + return &inet6_stream_ops; #endif - } - - return false; + WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + return &inet_stream_ops; } static int __mptcp_socket_create(struct mptcp_sock *msk) @@ -443,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk) switch (sk->sk_state) { case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_FIN_WAIT2); + mptcp_set_state(sk, TCP_FIN_WAIT2); break; case TCP_CLOSING: case TCP_LAST_ACK: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; } @@ -608,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); + mptcp_set_state(sk, TCP_CLOSE_WAIT); break; case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); + mptcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; default: /* Other states not expected */ @@ -789,7 +775,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) */ ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); + mptcp_set_state(sk, ssk_state); WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ @@ -2477,7 +2463,7 @@ out: inet_sk_state_load(msk->first) == TCP_CLOSE) { if (sk->sk_state != TCP_ESTABLISHED || msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_close_wake_up(sk); } else { mptcp_start_tout_timer(sk); @@ -2572,7 +2558,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) WRITE_ONCE(sk->sk_err, ECONNRESET); } - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2707,7 +2693,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2885,6 +2871,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) release_sock(ssk); } +void mptcp_set_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_state; + + switch (state) { + case TCP_ESTABLISHED: + if (oldstate != TCP_ESTABLISHED) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + break; + + default: + if (oldstate == TCP_ESTABLISHED) + MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + } + + inet_sk_state_store(sk, state); +} + static const unsigned char new_state[16] = { /* current state: new state: action: */ [0 /* (Invalid) */] = TCP_CLOSE, @@ -2907,7 +2911,7 @@ static int mptcp_close_state(struct sock *sk) int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; - inet_sk_state_store(sk, ns); + mptcp_set_state(sk, ns); return next & TCP_ACTION_FIN; } @@ -3018,7 +3022,7 @@ bool __mptcp_close(struct sock *sk, long timeout) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); goto cleanup; } @@ -3061,7 +3065,7 @@ cleanup: * state, let's not keep resources busy for no reasons */ if (subflows_alive == 0) - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); @@ -3127,7 +3131,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) return -EBUSY; mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); @@ -3215,7 +3219,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store(nsk, TCP_ESTABLISHED); + mptcp_set_state(nsk, TCP_ESTABLISHED); /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); @@ -3258,44 +3262,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } -static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, - bool kern) -{ - struct sock *newsk; - - pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); - newsk = inet_csk_accept(ssk, flags, err, kern); - if (!newsk) - return NULL; - - pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); - if (sk_is_mptcp(newsk)) { - struct mptcp_subflow_context *subflow; - struct sock *new_mptcp_sock; - - subflow = mptcp_subflow_ctx(newsk); - new_mptcp_sock = subflow->conn; - - /* is_mptcp should be false if subflow->conn is missing, see - * subflow_syn_recv_sock() - */ - if (WARN_ON_ONCE(!new_mptcp_sock)) { - tcp_sk(newsk)->is_mptcp = 0; - goto out; - } - - newsk = new_mptcp_sock; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); - } - -out: - newsk->sk_kern_sock = kern; - return newsk; -} - void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) { struct mptcp_subflow_context *subflow, *tmp; @@ -3402,12 +3368,12 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before taking actions + /* be sure to sync the msk state before taking actions * depending on sk_state (MPTCP_ERROR_REPORT) * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) - __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first) + __mptcp_sync_state(sk, msk->pending_state); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) @@ -3674,7 +3640,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(ssk)) return PTR_ERR(ssk); - inet_sk_state_store(sk, TCP_SYN_SENT); + mptcp_set_state(sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -3724,7 +3690,7 @@ out: if (unlikely(err)) { /* avoid leaving a dangling token in an unconnected socket */ mptcp_token_destroy(msk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); return err; } @@ -3739,7 +3705,6 @@ static struct proto mptcp_prot = { .connect = mptcp_connect, .disconnect = mptcp_disconnect, .close = mptcp_close, - .accept = mptcp_accept, .setsockopt = mptcp_setsockopt, .getsockopt = mptcp_getsockopt, .shutdown = mptcp_shutdown, @@ -3814,13 +3779,13 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } - inet_sk_state_store(sk, TCP_LISTEN); + mptcp_set_state(sk, TCP_LISTEN); sock_set_flag(sk, SOCK_RCU_FREE); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); release_sock(ssk); - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + mptcp_set_state(sk, inet_sk_state_load(ssk)); if (!err) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3849,18 +3814,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk) return -EINVAL; - newsk = mptcp_accept(ssk, flags, &err, kern); + pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); + newsk = inet_csk_accept(ssk, flags, &err, kern); if (!newsk) return err; - lock_sock(newsk); - - __inet_accept(sock, newsock, newsk); - if (!mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsk); + pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); + if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; + struct sock *new_mptcp_sock; + + subflow = mptcp_subflow_ctx(newsk); + new_mptcp_sock = subflow->conn; + + /* is_mptcp should be false if subflow->conn is missing, see + * subflow_syn_recv_sock() + */ + if (WARN_ON_ONCE(!new_mptcp_sock)) { + tcp_sk(newsk)->is_mptcp = 0; + goto tcpfallback; + } + + newsk = new_mptcp_sock; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + + newsk->sk_kern_sock = kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk = mptcp_sk(newsk); msk->in_accept_queue = 0; /* set ssk->sk_socket of accept()ed flows to mptcp socket. @@ -3880,8 +3863,23 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, __mptcp_close_ssk(newsk, msk->first, mptcp_subflow_ctx(msk->first), 0); if (unlikely(list_is_singular(&msk->conn_list))) - inet_sk_state_store(newsk, TCP_CLOSE); + mptcp_set_state(newsk, TCP_CLOSE); } + } else { + MPTCP_INC_STATS(sock_net(ssk), + MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); +tcpfallback: + newsk->sk_kern_sock = kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); + /* we are being invoked after accepting a non-mp-capable + * flow: sk is a tcp_sk, not an mptcp one. + * + * Hand the socket over to tcp so all further socket ops + * bypass mptcp. + */ + WRITE_ONCE(newsock->sk->sk_socket->ops, + mptcp_fallback_tcp_ops(newsock->sk)); } release_sock(newsk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 458a2d7bb0dd..3517f2d24a22 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,7 +124,7 @@ #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 -#define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { @@ -296,6 +296,9 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 pending_state; /* A subflow asked to set this sk_state, + * protected by the msk data lock + */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -638,6 +641,7 @@ bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); +void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); @@ -728,7 +732,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); -void __mptcp_set_connected(struct sock *sk); +void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) @@ -1124,7 +1128,7 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return sk->sk_state == TCP_ESTABLISHED && + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index cabe856b2a45..c40f1428e602 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname) /* should work fine */ case IP_FREEBIND: case IP_TRANSPARENT: + case IP_BIND_ADDRESS_NO_PORT: + case IP_LOCAL_PORT_RANGE: /* the following are control cmsg related */ case IP_PKTINFO: @@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname) /* common stuff that need some love */ case IP_TOS: case IP_TTL: - case IP_BIND_ADDRESS_NO_PORT: case IP_MTU_DISCOVER: case IP_RECVERR: @@ -683,8 +684,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op return 0; } -static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) +static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; @@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); break; + case IP_BIND_ADDRESS_NO_PORT: + inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, + inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + break; + case IP_LOCAL_PORT_RANGE: + WRITE_ONCE(inet_sk(ssk)->local_port_range, + READ_ONCE(inet_sk(sk)->local_port_range)); + break; default: release_sock(sk); WARN_ON_ONCE(1); @@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_FREEBIND: case IP_TRANSPARENT: - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); + case IP_BIND_ADDRESS_NO_PORT: + case IP_LOCAL_PORT_RANGE: + return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); case IP_TOS: return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); } @@ -1350,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); + case IP_BIND_ADDRESS_NO_PORT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + case IP_LOCAL_PORT_RANGE: + return mptcp_put_int_option(msk, optval, optlen, + READ_ONCE(inet_sk(sk)->local_port_range)); } return -EOPNOTSUPP; @@ -1450,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); + inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a4f3c27f0309..1ef28642afc4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -419,22 +419,28 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } -void __mptcp_set_connected(struct sock *sk) +void __mptcp_sync_state(struct sock *sk, int state) { - __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, TCP_ESTABLISHED); + mptcp_set_state(sk, state); sk->sk_state_change(sk); } } -static void mptcp_set_connected(struct sock *sk) +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) { + struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) - __mptcp_set_connected(sk); - else - __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } mptcp_data_unlock(sk); } @@ -496,7 +502,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -540,7 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(msk, sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } return; @@ -1740,7 +1746,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } /* as recvmsg() does not acquire the subflow socket for ssk selection diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index 0758865ab658..bfff53e668da 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = { kunit_test_suite(mptcp_token_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Token"); diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 5257d5e7eb09..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, static const struct bpf_func_proto * bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } const struct bpf_verifier_ops netfilter_verifier_ops = { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fb0ae15e96df..0c22a02c2035 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -992,13 +992,13 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) if (err) goto err_filter; - if (!cda[CTA_FILTER]) - return filter; - err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); if (err < 0) goto err_filter; + if (!cda[CTA_FILTER]) + return filter; + err = ctnetlink_parse_filter(cda[CTA_FILTER], filter); if (err < 0) goto err_filter; @@ -1043,7 +1043,7 @@ err_filter: static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) { - return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS]; + return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || cda[CTA_ZONE]; } static int ctnetlink_start(struct netlink_callback *cb) @@ -1148,6 +1148,10 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) if (filter->family && nf_ct_l3num(ct) != filter->family) goto ignore_entry; + if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID && + !nf_ct_zone_equal_any(ct, &filter->zone)) + goto ignore_entry; + if (filter->orig_flags) { tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL); if (!ctnetlink_filter_match_tuple(&filter->orig, tuple, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c5c17c6e80ed..5531b13d92b6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2261,7 +2261,16 @@ static int nft_chain_parse_hook(struct net *net, return -EOPNOTSUPP; } - type = basechain->type; + if (nla[NFTA_CHAIN_TYPE]) { + type = __nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], + family); + if (!type) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); + return -ENOENT; + } + } else { + type = basechain->type; + } } if (!try_module_get(type->owner)) { @@ -5817,10 +5826,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); nlmsg_end(skb, nlh); - if (dump_ctx->reset && args.iter.count > args.iter.skip) - audit_log_nft_set_reset(table, cb->seq, - args.iter.count - args.iter.skip); - rcu_read_unlock(); if (args.iter.err && args.iter.err != -EMSGSIZE) @@ -5836,6 +5841,26 @@ nla_put_failure: return -ENOSPC; } +static int nf_tables_dumpreset_set(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + struct nft_set_dump_ctx *dump_ctx = cb->data; + int ret, skip = cb->args[0]; + + mutex_lock(&nft_net->commit_mutex); + + ret = nf_tables_dump_set(skb, cb); + + if (cb->args[0] > skip) + audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, + cb->args[0] - skip); + + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_set_start(struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; @@ -5910,7 +5935,7 @@ static int nft_setelem_parse_flags(const struct nft_set *set, return 0; } -static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, +static int nft_setelem_parse_key(struct nft_ctx *ctx, const struct nft_set *set, struct nft_data *key, struct nlattr *attr) { struct nft_data_desc desc = { @@ -5963,7 +5988,7 @@ static void *nft_setelem_catchall_get(const struct net *net, return priv; } -static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set, +static int nft_setelem_get(struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_elem *elem, u32 flags) { void *priv; @@ -5982,7 +6007,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set, return 0; } -static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, +static int nft_get_set_elem(struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr, bool reset) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -6039,21 +6064,18 @@ err_fill_setelem: return err; } -/* called with rcu_read_lock held */ -static int nf_tables_getsetelem(struct sk_buff *skb, - const struct nfnl_info *info, - const struct nlattr * const nla[]) +static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx, + const struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[], + bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; - int rem, err = 0, nelems = 0; struct net *net = info->net; struct nft_table *table; struct nft_set *set; - struct nlattr *attr; - struct nft_ctx ctx; - bool reset = false; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, 0); @@ -6068,10 +6090,22 @@ static int nf_tables_getsetelem(struct sk_buff *skb, return PTR_ERR(set); } - nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + nft_ctx_init(&dump_ctx->ctx, net, skb, + info->nlh, family, table, NULL, nla); + dump_ctx->set = set; + dump_ctx->reset = reset; + return 0; +} - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; +/* called with rcu_read_lock held */ +static int nf_tables_getsetelem(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct netlink_ext_ack *extack = info->extack; + struct nft_set_dump_ctx dump_ctx; + struct nlattr *attr; + int rem, err = 0; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -6080,12 +6114,55 @@ static int nf_tables_getsetelem(struct sk_buff *skb, .done = nf_tables_dump_set_done, .module = THIS_MODULE, }; - struct nft_set_dump_ctx dump_ctx = { - .set = set, - .ctx = ctx, - .reset = reset, + + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); + if (err) + return err; + + c.data = &dump_ctx; + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) + return -EINVAL; + + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); + if (err) + return err; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false); + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); + break; + } + } + + return err; +} + +static int nf_tables_getsetelem_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + struct netlink_ext_ack *extack = info->extack; + struct nft_set_dump_ctx dump_ctx; + int rem, err = 0, nelems = 0; + struct nlattr *attr; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_set_start, + .dump = nf_tables_dumpreset_set, + .done = nf_tables_dump_set_done, + .module = THIS_MODULE, }; + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); + if (err) + return err; + c.data = &dump_ctx; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } @@ -6093,18 +6170,31 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + rcu_read_lock(); + + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); + if (err) + goto out_unlock; + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&ctx, set, attr, reset); + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; } nelems++; } + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems); - if (reset) - audit_log_nft_set_reset(table, nft_pernet(net)->base_seq, - nelems); +out_unlock: + rcu_read_unlock(); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); return err; } @@ -9078,7 +9168,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM_RESET] = { - .call = nf_tables_getsetelem, + .call = nf_tables_getsetelem_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, @@ -10383,6 +10473,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7252fcdae349..efd523496be4 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1597,7 +1597,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) if (nft_set_elem_expired(&e->ext)) { priv->dirty = true; - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 177126fb0484..4ed8ffd58ff3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1519,8 +1519,7 @@ out: int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, - struct sk_buff *skb, void *data), + netlink_filter_fn filter, void *filter_data) { struct net *net = sock_net(ssk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 9c7ffd10df2a..8c7af02f8454 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -631,6 +631,138 @@ static int genl_validate_ops(const struct genl_family *family) return 0; } +static void *genl_sk_priv_alloc(struct genl_family *family) +{ + void *priv; + + priv = kzalloc(family->sock_priv_size, GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + + if (family->sock_priv_init) + family->sock_priv_init(priv); + + return priv; +} + +static void genl_sk_priv_free(const struct genl_family *family, void *priv) +{ + if (family->sock_priv_destroy) + family->sock_priv_destroy(priv); + kfree(priv); +} + +static int genl_sk_privs_alloc(struct genl_family *family) +{ + if (!family->sock_priv_size) + return 0; + + family->sock_privs = kzalloc(sizeof(*family->sock_privs), GFP_KERNEL); + if (!family->sock_privs) + return -ENOMEM; + xa_init(family->sock_privs); + return 0; +} + +static void genl_sk_privs_free(const struct genl_family *family) +{ + unsigned long id; + void *priv; + + if (!family->sock_priv_size) + return; + + xa_for_each(family->sock_privs, id, priv) + genl_sk_priv_free(family, priv); + + xa_destroy(family->sock_privs); + kfree(family->sock_privs); +} + +static void genl_sk_priv_free_by_sock(struct genl_family *family, + struct sock *sk) +{ + void *priv; + + if (!family->sock_priv_size) + return; + priv = xa_erase(family->sock_privs, (unsigned long) sk); + if (!priv) + return; + genl_sk_priv_free(family, priv); +} + +static void genl_release(struct sock *sk, unsigned long *groups) +{ + struct genl_family *family; + unsigned int id; + + down_read(&cb_lock); + + idr_for_each_entry(&genl_fam_idr, family, id) + genl_sk_priv_free_by_sock(family, sk); + + up_read(&cb_lock); +} + +/** + * __genl_sk_priv_get - Get family private pointer for socket, if exists + * + * @family: family + * @sk: socket + * + * Lookup a private memory for a Generic netlink family and specified socket. + * + * Caller should make sure this is called in RCU read locked section. + * + * Return: valid pointer on success, otherwise negative error value + * encoded by ERR_PTR(), NULL in case priv does not exist. + */ +void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk) +{ + if (WARN_ON_ONCE(!family->sock_privs)) + return ERR_PTR(-EINVAL); + return xa_load(family->sock_privs, (unsigned long) sk); +} + +/** + * genl_sk_priv_get - Get family private pointer for socket + * + * @family: family + * @sk: socket + * + * Lookup a private memory for a Generic netlink family and specified socket. + * Allocate the private memory in case it was not already done. + * + * Return: valid pointer on success, otherwise negative error value + * encoded by ERR_PTR(). + */ +void *genl_sk_priv_get(struct genl_family *family, struct sock *sk) +{ + void *priv, *old_priv; + + priv = __genl_sk_priv_get(family, sk); + if (priv) + return priv; + + /* priv for the family does not exist so far, create it. */ + + priv = genl_sk_priv_alloc(family); + if (IS_ERR(priv)) + return ERR_CAST(priv); + + old_priv = xa_cmpxchg(family->sock_privs, (unsigned long) sk, NULL, + priv, GFP_KERNEL); + if (old_priv) { + genl_sk_priv_free(family, priv); + if (xa_is_err(old_priv)) + return ERR_PTR(xa_err(old_priv)); + /* Race happened, priv for the socket was already inserted. */ + return old_priv; + } + return priv; +} + /** * genl_register_family - register a generic netlink family * @family: generic netlink family @@ -659,6 +791,10 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } + err = genl_sk_privs_alloc(family); + if (err) + goto errout_locked; + /* * Sadly, a few cases need to be special-cased * due to them having previously abused the API @@ -679,7 +815,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_sk_privs_free; } err = genl_validate_assign_mc_groups(family); @@ -698,6 +834,8 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_sk_privs_free: + genl_sk_privs_free(family); errout_locked: genl_unlock_all(); return err; @@ -728,6 +866,9 @@ int genl_unregister_family(const struct genl_family *family) up_write(&cb_lock); wait_event(genl_sk_destructing_waitq, atomic_read(&genl_sk_destructing_cnt) == 0); + + genl_sk_privs_free(family); + genl_unlock(); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -1688,10 +1829,10 @@ static int genl_bind(struct net *net, int group) continue; grp = &family->mcgrps[i]; - if ((grp->flags & GENL_UNS_ADMIN_PERM) && + if ((grp->flags & GENL_MCAST_CAP_NET_ADMIN) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; - if (grp->cap_sys_admin && + if ((grp->flags & GENL_MCAST_CAP_SYS_ADMIN) && !ns_capable(net->user_ns, CAP_SYS_ADMIN)) ret = -EPERM; @@ -1708,6 +1849,7 @@ static int __net_init genl_pernet_init(struct net *net) .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, .bind = genl_bind, + .release = genl_release, }; /* we'll bump the group number right afterwards */ diff --git a/net/psample/psample.c b/net/psample/psample.c index c34e902855db..ddd211a151d0 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -32,7 +32,7 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, - .flags = GENL_UNS_ADMIN_PERM }, + .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; static struct genl_family psample_nl_family __ro_after_init; diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 5a81505fba9a..4e32d659524e 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -126,6 +126,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } + ret = gpiod_direction_output(rfkill->reset_gpio, true); + if (ret) + return ret; + + ret = gpiod_direction_output(rfkill->shutdown_gpio, true); + if (ret) + return ret; + rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ecb91ad4ce63..ef81d019b20f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -182,21 +182,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) */ static void rose_kill_by_device(struct net_device *dev) { - struct sock *s; + struct sock *sk, *array[16]; + struct rose_sock *rose; + bool rescan; + int i, cnt; +start: + rescan = false; + cnt = 0; spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); + sk_for_each(sk, &rose_list) { + rose = rose_sk(sk); + if (rose->device == dev) { + if (cnt == ARRAY_SIZE(array)) { + rescan = true; + break; + } + sock_hold(sk); + array[cnt++] = sk; + } + } + spin_unlock_bh(&rose_list_lock); + for (i = 0; i < cnt; i++) { + sk = array[cnt]; + rose = rose_sk(sk); + lock_sock(sk); + spin_lock_bh(&rose_list_lock); if (rose->device == dev) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); + rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } + spin_unlock_bh(&rose_list_lock); + release_sock(sk); + sock_put(sk); + cond_resched(); } - spin_unlock_bh(&rose_list_lock); + if (rescan) + goto start; } /* @@ -656,7 +682,10 @@ static int rose_release(struct socket *sock) break; } + spin_lock_bh(&rose_list_lock); netdev_put(rose->device, &rose->dev_tracker); + rose->device = NULL; + spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); diff --git a/net/sched/Makefile b/net/sched/Makefile index b5fd49641d91..82c3f78ca486 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o -obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3a7770eff52d..ef70d4771811 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1119,7 +1119,8 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); @@ -1312,7 +1313,7 @@ void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; - if (init_res[i] == 0) /* Bound */ + if (init_res[i] == ACT_P_BOUND) continue; idrinfo = a->idrinfo; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b0455fda7d0b..6cfee6658103 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -318,7 +318,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } else if (ret > 0) { /* Don't override defaults. */ if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 0d7aee8933c5..f8762756657d 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -146,7 +146,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } else if (ret > 0) { ci = to_connmark(*a); if (bind) { - err = 0; + err = ACT_P_BOUND; goto out_free; } if (!(flags & TCA_ACT_FLAGS_REPLACE)) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 8ed285023a40..7f8b1f2f2ed9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -77,8 +77,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else if (err > 0) { - if (bind)/* dont override defaults */ - return 0; + if (bind) /* dont override defaults */ + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f69c47945175..c3e004b5b820 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1349,7 +1349,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, res = ACT_P_CREATED; } else { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 4d15b6a6169c..e620f9a84afe 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* don't override defaults */ - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 904ab3d457ef..4af3b7ec249f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -108,7 +108,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind)/* dont override defaults */ - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 393b78729216..c681cd011afd 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -356,7 +356,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return err; if (err && bind) - return 0; + return ACT_P_BOUND; if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index bc7611b0744c..0e867d13beb5 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -548,7 +548,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, exists = err; if (exists && bind) { kfree(p); - return 0; + return ACT_P_BOUND; } if (!exists) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c deleted file mode 100644 index 598d6e299152..000000000000 --- a/net/sched/act_ipt.c +++ /dev/null @@ -1,464 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/act_ipt.c iptables target interface - * - *TODO: Add other tables. For now we only support the ipv4 table targets - * - * Copyright: Jamal Hadi Salim (2002-13) - */ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <linux/tc_act/tc_ipt.h> -#include <net/tc_act/tc_ipt.h> -#include <net/tc_wrapper.h> -#include <net/ip.h> - -#include <linux/netfilter_ipv4/ip_tables.h> - - -static struct tc_action_ops act_ipt_ops; -static struct tc_action_ops act_xt_ops; - -static int ipt_init_target(struct net *net, struct xt_entry_target *t, - char *table, unsigned int hook) -{ - struct xt_tgchk_param par; - struct xt_target *target; - struct ipt_entry e = {}; - int ret = 0; - - target = xt_request_find_target(AF_INET, t->u.user.name, - t->u.user.revision); - if (IS_ERR(target)) - return PTR_ERR(target); - - t->u.kernel.target = target; - memset(&par, 0, sizeof(par)); - par.net = net; - par.table = table; - par.entryinfo = &e; - par.target = target; - par.targinfo = t->data; - par.hook_mask = 1 << hook; - par.family = NFPROTO_IPV4; - - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); - if (ret < 0) { - module_put(t->u.kernel.target->me); - return ret; - } - return 0; -} - -static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) -{ - struct xt_tgdtor_param par = { - .target = t->u.kernel.target, - .targinfo = t->data, - .family = NFPROTO_IPV4, - .net = net, - }; - if (par.target->destroy != NULL) - par.target->destroy(&par); - module_put(par.target->me); -} - -static void tcf_ipt_release(struct tc_action *a) -{ - struct tcf_ipt *ipt = to_ipt(a); - - if (ipt->tcfi_t) { - ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); - kfree(ipt->tcfi_t); - } - kfree(ipt->tcfi_tname); -} - -static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { - [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, - [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING, - NF_INET_NUMHOOKS), - [TCA_IPT_INDEX] = { .type = NLA_U32 }, - [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, -}; - -static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, - struct tcf_proto *tp, u32 flags) -{ - struct tc_action_net *tn = net_generic(net, id); - bool bind = flags & TCA_ACT_FLAGS_BIND; - struct nlattr *tb[TCA_IPT_MAX + 1]; - struct tcf_ipt *ipt; - struct xt_entry_target *td, *t; - char *tname; - bool exists = false; - int ret = 0, err; - u32 hook = 0; - u32 index = 0; - - if (nla == NULL) - return -EINVAL; - - err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy, - NULL); - if (err < 0) - return err; - - if (tb[TCA_IPT_INDEX] != NULL) - index = nla_get_u32(tb[TCA_IPT_INDEX]); - - err = tcf_idr_check_alloc(tn, &index, a, bind); - if (err < 0) - return err; - exists = err; - if (exists && bind) - return 0; - - if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, index); - return -EINVAL; - } - - td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, index); - return -EINVAL; - } - - if (!exists) { - ret = tcf_idr_create(tn, index, est, a, ops, bind, - false, flags); - if (ret) { - tcf_idr_cleanup(tn, index); - return ret; - } - ret = ACT_P_CREATED; - } else { - if (bind)/* dont override defaults */ - return 0; - - if (!(flags & TCA_ACT_FLAGS_REPLACE)) { - tcf_idr_release(*a, bind); - return -EEXIST; - } - } - - err = -EINVAL; - hook = nla_get_u32(tb[TCA_IPT_HOOK]); - switch (hook) { - case NF_INET_PRE_ROUTING: - break; - case NF_INET_POST_ROUTING: - break; - default: - goto err1; - } - - if (tb[TCA_IPT_TABLE]) { - /* mangle only for now */ - if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle")) - goto err1; - } - - tname = kstrdup("mangle", GFP_KERNEL); - if (unlikely(!tname)) - goto err1; - - t = kmemdup(td, td->u.target_size, GFP_KERNEL); - if (unlikely(!t)) - goto err2; - - err = ipt_init_target(net, t, tname, hook); - if (err < 0) - goto err3; - - ipt = to_ipt(*a); - - spin_lock_bh(&ipt->tcf_lock); - if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t, net); - kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); - } - ipt->tcfi_tname = tname; - ipt->tcfi_t = t; - ipt->tcfi_hook = hook; - spin_unlock_bh(&ipt->tcf_lock); - return ret; - -err3: - kfree(t); -err2: - kfree(tname); -err1: - tcf_idr_release(*a, bind); - return err; -} - -static int tcf_ipt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - struct tcf_proto *tp, - u32 flags, struct netlink_ext_ack *extack) -{ - return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est, - a, &act_ipt_ops, tp, flags); -} - -static int tcf_xt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - struct tcf_proto *tp, - u32 flags, struct netlink_ext_ack *extack) -{ - return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est, - a, &act_xt_ops, tp, flags); -} - -static bool tcf_ipt_act_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - unsigned int nhoff, len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return false; - - nhoff = skb_network_offset(skb); - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return false; - - len = skb_ip_totlen(skb); - if (skb->len < nhoff + len || len < (iph->ihl * 4u)) - return false; - - return pskb_may_pull(skb, iph->ihl * 4u); -} - -TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) -{ - char saved_cb[sizeof_field(struct sk_buff, cb)]; - int ret = 0, result = 0; - struct tcf_ipt *ipt = to_ipt(a); - struct xt_action_param par; - struct nf_hook_state state = { - .net = dev_net(skb->dev), - .in = skb->dev, - .hook = ipt->tcfi_hook, - .pf = NFPROTO_IPV4, - }; - - if (skb_protocol(skb, false) != htons(ETH_P_IP)) - return TC_ACT_UNSPEC; - - if (skb_unclone(skb, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - - if (!tcf_ipt_act_check(skb)) - return TC_ACT_UNSPEC; - - if (state.hook == NF_INET_POST_ROUTING) { - if (!skb_dst(skb)) - return TC_ACT_UNSPEC; - - state.out = skb->dev; - } - - memcpy(saved_cb, skb->cb, sizeof(saved_cb)); - - spin_lock(&ipt->tcf_lock); - - tcf_lastuse_update(&ipt->tcf_tm); - bstats_update(&ipt->tcf_bstats, skb); - - /* yes, we have to worry about both in and out dev - * worry later - danger - this API seems to have changed - * from earlier kernels - */ - par.state = &state; - par.target = ipt->tcfi_t->u.kernel.target; - par.targinfo = ipt->tcfi_t->data; - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - - ret = par.target->target(skb, &par); - - switch (ret) { - case NF_ACCEPT: - result = TC_ACT_OK; - break; - case NF_DROP: - result = TC_ACT_SHOT; - ipt->tcf_qstats.drops++; - break; - case XT_CONTINUE: - result = TC_ACT_PIPE; - break; - default: - net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n", - ret); - result = TC_ACT_OK; - break; - } - spin_unlock(&ipt->tcf_lock); - - memcpy(skb->cb, saved_cb, sizeof(skb->cb)); - - return result; - -} - -static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, - int ref) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tcf_ipt *ipt = to_ipt(a); - struct xt_entry_target *t; - struct tcf_t tm; - struct tc_cnt c; - - /* for simple targets kernel size == user size - * user name = target name - * for foolproof you need to not assume this - */ - - spin_lock_bh(&ipt->tcf_lock); - t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); - if (unlikely(!t)) - goto nla_put_failure; - - c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind; - c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref; - strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); - - if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) || - nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) || - nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) || - nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) || - nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname)) - goto nla_put_failure; - - tcf_tm_dump(&tm, &ipt->tcf_tm); - if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) - goto nla_put_failure; - - spin_unlock_bh(&ipt->tcf_lock); - kfree(t); - return skb->len; - -nla_put_failure: - spin_unlock_bh(&ipt->tcf_lock); - nlmsg_trim(skb, b); - kfree(t); - return -1; -} - -static struct tc_action_ops act_ipt_ops = { - .kind = "ipt", - .id = TCA_ID_IPT, - .owner = THIS_MODULE, - .act = tcf_ipt_act, - .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_release, - .init = tcf_ipt_init, - .size = sizeof(struct tcf_ipt), -}; - -static __net_init int ipt_init_net(struct net *net) -{ - struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); - - return tc_action_net_init(net, tn, &act_ipt_ops); -} - -static void __net_exit ipt_exit_net(struct list_head *net_list) -{ - tc_action_net_exit(net_list, act_ipt_ops.net_id); -} - -static struct pernet_operations ipt_net_ops = { - .init = ipt_init_net, - .exit_batch = ipt_exit_net, - .id = &act_ipt_ops.net_id, - .size = sizeof(struct tc_action_net), -}; - -static struct tc_action_ops act_xt_ops = { - .kind = "xt", - .id = TCA_ID_XT, - .owner = THIS_MODULE, - .act = tcf_ipt_act, - .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_release, - .init = tcf_xt_init, - .size = sizeof(struct tcf_ipt), -}; - -static __net_init int xt_init_net(struct net *net) -{ - struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); - - return tc_action_net_init(net, tn, &act_xt_ops); -} - -static void __net_exit xt_exit_net(struct list_head *net_list) -{ - tc_action_net_exit(net_list, act_xt_ops.net_id); -} - -static struct pernet_operations xt_net_ops = { - .init = xt_init_net, - .exit_batch = xt_exit_net, - .id = &act_xt_ops.net_id, - .size = sizeof(struct tc_action_net), -}; - -MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); -MODULE_DESCRIPTION("Iptables target actions"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("act_xt"); - -static int __init ipt_init_module(void) -{ - int ret1, ret2; - - ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops); - if (ret1 < 0) - pr_err("Failed to load xt action\n"); - - ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops); - if (ret2 < 0) - pr_err("Failed to load ipt action\n"); - - if (ret1 < 0 && ret2 < 0) { - return ret1; - } else - return 0; -} - -static void __exit ipt_cleanup_module(void) -{ - tcf_unregister_action(&act_ipt_ops, &ipt_net_ops); - tcf_unregister_action(&act_xt_ops, &xt_net_ops); -} - -module_init(ipt_init_module); -module_exit(ipt_cleanup_module); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a711c184c29..12386f590b0f 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -85,10 +85,21 @@ static void tcf_mirred_release(struct tc_action *a) static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, + [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1), }; static struct tc_action_ops act_mirred_ops; +static void tcf_mirred_replace_dev(struct tcf_mirred *m, + struct net_device *ndev) +{ + struct net_device *odev; + + odev = rcu_replace_pointer(m->tcfm_dev, ndev, + lockdep_is_held(&m->tcf_lock)); + netdev_put(odev, &m->tcfm_dev_tracker); +} + static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, @@ -124,7 +135,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; + + if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot specify Block ID and dev simultaneously"); + if (exists) + tcf_idr_release(*a, bind); + else + tcf_idr_cleanup(tn, index); + + return -EINVAL; + } switch (parm->eaction) { case TCA_EGRESS_MIRROR: @@ -142,9 +164,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (!exists) { - if (!parm->ifindex) { + if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) { tcf_idr_cleanup(tn, index); - NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); + NL_SET_ERR_MSG_MOD(extack, + "Must specify device or block"); return -EINVAL; } ret = tcf_idr_create_from_flags(tn, index, est, a, @@ -170,7 +193,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { - struct net_device *odev, *ndev; + struct net_device *ndev; ndev = dev_get_by_index(net, parm->ifindex); if (!ndev) { @@ -179,11 +202,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(ndev); - odev = rcu_replace_pointer(m->tcfm_dev, ndev, - lockdep_is_held(&m->tcf_lock)); - netdev_put(odev, &m->tcfm_dev_tracker); + tcf_mirred_replace_dev(m, ndev); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; + m->tcfm_blockid = 0; + } else if (tb[TCA_MIRRED_BLOCKID]) { + tcf_mirred_replace_dev(m, NULL); + m->tcfm_mac_header_xmit = false; + m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]); } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); m->tcfm_eaction = parm->eaction; @@ -225,48 +251,26 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, + struct net_device *dev, + const bool m_mac_header_xmit, int m_eaction, + int retval) { - struct tcf_mirred *m = to_mirred(a); - struct sk_buff *skb2 = skb; - bool m_mac_header_xmit; - struct net_device *dev; - unsigned int nest_level; - int retval, err = 0; - bool use_reinsert; + struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; - int m_eaction; + bool dont_clone; int mac_len; bool at_nh; + int err; - nest_level = __this_cpu_inc_return(mirred_nest_level); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { - net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", - netdev_name(skb->dev)); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_SHOT; - } - - tcf_lastuse_update(&m->tcf_tm); - tcf_action_update_bstats(&m->common, skb); - - m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); - retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference_bh(m->tcfm_dev); - if (unlikely(!dev)) { - pr_notice_once("tc mirred: target device is gone\n"); - goto out; - } - + is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); + err = -ENODEV; goto out; } @@ -274,61 +278,188 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - is_redirect = tcf_mirred_is_act_redirect(m_eaction); at_ingress = skb_at_tc_ingress(skb); - use_reinsert = at_ingress && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!use_reinsert) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) { + err = -ENOMEM; goto out; + } } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ - nf_reset_ct(skb2); + nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ - skb_dst_drop(skb2); + skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { - mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ - skb_pull_rcsum(skb2, mac_len); + skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ - skb_push_rcsum(skb2, mac_len); + skb_push_rcsum(skb_to_send, mac_len); } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; + skb_to_send->skb_iif = skb->dev->ifindex; + skb_to_send->dev = dev; - /* mirror is always swallowed */ if (is_redirect) { - skb_set_redirected(skb2, skb2->tc_at_ingress); - - /* let's the caller reinsert the packet, if possible */ - if (use_reinsert) { - err = tcf_mirred_forward(want_ingress, skb); - if (err) - tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_CONSUMED; - } + if (skb == skb_to_send) + retval = TC_ACT_CONSUMED; + + skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + + err = tcf_mirred_forward(want_ingress, skb_to_send); + } else { + err = tcf_mirred_forward(want_ingress, skb_to_send); } - err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); - if (tcf_mirred_is_act_redirect(m_eaction)) + if (is_redirect) retval = TC_ACT_SHOT; } + + return retval; +} + +static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, + struct tcf_block *block, int m_eaction, + const u32 exception_ifindex, int retval) +{ + struct net_device *dev_prev = NULL; + struct net_device *dev = NULL; + unsigned long index; + int mirred_eaction; + + mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ? + TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR; + + xa_for_each(&block->ports, index, dev) { + if (index == exception_ifindex) + continue; + + if (!dev_prev) + goto assign_prev; + + tcf_mirred_to_dev(skb, m, dev_prev, + dev_is_mac_header_xmit(dev), + mirred_eaction, retval); +assign_prev: + dev_prev = dev; + } + + if (dev_prev) + return tcf_mirred_to_dev(skb, m, dev_prev, + dev_is_mac_header_xmit(dev_prev), + m_eaction, retval); + + return retval; +} + +static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, + struct tcf_block *block, int m_eaction, + const u32 exception_ifindex, int retval) +{ + struct net_device *dev = NULL; + unsigned long index; + + xa_for_each(&block->ports, index, dev) { + if (index == exception_ifindex) + continue; + + tcf_mirred_to_dev(skb, m, dev, + dev_is_mac_header_xmit(dev), + m_eaction, retval); + } + + return retval; +} + +static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, + const u32 blockid, struct tcf_result *res, + int retval) +{ + const u32 exception_ifindex = skb->dev->ifindex; + struct tcf_block *block; + bool is_redirect; + int m_eaction; + + m_eaction = READ_ONCE(m->tcfm_eaction); + is_redirect = tcf_mirred_is_act_redirect(m_eaction); + + /* we are already under rcu protection, so can call block lookup + * directly. + */ + block = tcf_block_lookup(dev_net(skb->dev), blockid); + if (!block || xa_empty(&block->ports)) { + tcf_action_inc_overlimit_qstats(&m->common); + return retval; + } + + if (is_redirect) + return tcf_blockcast_redir(skb, m, block, m_eaction, + exception_ifindex, retval); + + /* If it's not redirect, it is mirror */ + return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex, + retval); +} + +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_mirred *m = to_mirred(a); + int retval = READ_ONCE(m->tcf_action); + unsigned int nest_level; + bool m_mac_header_xmit; + struct net_device *dev; + int m_eaction; + u32 blockid; + + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + retval = TC_ACT_SHOT; + goto dec_nest_level; + } + + tcf_lastuse_update(&m->tcf_tm); + tcf_action_update_bstats(&m->common, skb); + + blockid = READ_ONCE(m->tcfm_blockid); + if (blockid) { + retval = tcf_blockcast(skb, m, blockid, res, retval); + goto dec_nest_level; + } + + dev = rcu_dereference_bh(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); + tcf_action_inc_overlimit_qstats(&m->common); + goto dec_nest_level; + } + + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); + + retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, + retval); + +dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; @@ -356,6 +487,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, }; struct net_device *dev; struct tcf_t t; + u32 blockid; spin_lock_bh(&m->tcf_lock); opt.action = m->tcf_action; @@ -367,6 +499,10 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + blockid = m->tcfm_blockid; + if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid)) + goto nla_put_failure; + tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; @@ -397,6 +533,8 @@ static int mirred_device_event(struct notifier_block *unused, * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); + } else if (m->tcfm_blockid) { + m->tcfm_blockid = 0; } spin_unlock_bh(&m->tcf_lock); } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 1010dc632874..34b8edb6cc77 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -195,7 +195,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 4184af5abbf3..a180e724634e 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -69,7 +69,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 1ef8fcfa9997..2ef22969f274 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -202,7 +202,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { ret = -EEXIST; goto out_release; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f3121c5a85e9..e119b4a3db9f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -77,7 +77,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, NULL, a, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 4c670e7568dc..c5c61efe6db4 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -66,7 +66,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, est, a, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4b84514534f3..0a3e92888295 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -118,7 +118,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (tb[TCA_DEF_DATA] == NULL) { if (exists) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ce7008cf291c..754f78b35bb8 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -209,7 +209,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!flags) { if (exists) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index dffa990a9629..bcb673ab0008 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -157,7 +157,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!lflags) { if (exists) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0c8aa7e686ea..300b08aa8283 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -401,7 +401,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; switch (parm->t_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 0251442f5f29..836183011a7c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -151,7 +151,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; switch (parm->v_action) { case TCA_VLAN_ACT_POP: diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc1c19a25882..adf5de1ff773 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); mutex_destroy(&block->proto_destroy_lock); + xa_destroy(&block->ports); kfree_rcu(block, rcu); } @@ -1002,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, refcount_set(&block->refcnt, 1); block->net = net; block->index = block_index; + xa_init(&block->ports); /* Don't store q pointer for blocks which are shared */ if (!tcf_block_shared(block)) @@ -1009,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return block; } -static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) +struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) { struct tcf_net *tn = net_generic(net, tcf_net_id); return idr_find(&tn->idr, block_index); } +EXPORT_SYMBOL(tcf_block_lookup); static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) { @@ -1657,7 +1660,6 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { - u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1682,13 +1684,15 @@ reclassify: */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || !tp->ops->get_exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } exts = tp->ops->get_exts(tp, n->handle); if (unlikely(!exts || n->exts != exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1712,18 +1716,13 @@ reclassify: goto reset; } #endif - if (err >= 0) { - /* Policy drop or drop reason is over-written by - * classifiers with a bogus value(0) */ - if (err == TC_ACT_SHOT && - res->drop_reason == SKB_NOT_DROPPED_YET) - tcf_set_drop_reason(res, orig_reason); + if (err >= 0) return err; - } } if (unlikely(n)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1735,7 +1734,8 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_RECLASSIFY_LOOP); return TC_ACT_SHOT; } @@ -1773,7 +1773,8 @@ int tcf_classify(struct sk_buff *skb, n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); if (!n) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1784,7 +1785,9 @@ int tcf_classify(struct sk_buff *skb, fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); + return TC_ACT_SHOT; } @@ -1806,10 +1809,9 @@ int tcf_classify(struct sk_buff *skb, ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } - ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; @@ -2733,6 +2735,7 @@ errout: } static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = { + [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE), }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e9eaf637220e..2a2a48838eb9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1003,6 +1003,32 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) return false; } +static int qdisc_get_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, struct Qdisc *q, + struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (!tc_qdisc_dump_ignore(q, false)) { + if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0, + RTM_NEWQDISC, extack) < 0) + goto err_out; + } + + if (skb->len) + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + +err_out: + kfree_skb(skb); + return -EINVAL; +} + static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, @@ -1011,6 +1037,9 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1180,6 +1209,43 @@ skip: return 0; } +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops; + struct tcf_block *block; + int err; + + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL); + if (block) { + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); + if (err) { + NL_SET_ERR_MSG(extack, + "ingress block dev insert failed"); + return err; + } + } + + block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL); + if (block) { + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); + if (err) { + NL_SET_ERR_MSG(extack, + "Egress block dev insert failed"); + goto err_out; + } + } + + return 0; + +err_out: + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL); + if (block) + xa_erase(&block->ports, dev->ifindex); + + return err; +} + static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, struct netlink_ext_ack *extack) { @@ -1350,6 +1416,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev, qdisc_hash_add(sch, false); trace_qdisc_create(ops, dev, parent); + err = qdisc_block_add_dev(sch, dev, extack); + if (err) + goto err_out4; + return sch; err_out4: @@ -1542,7 +1612,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, if (err != 0) return err; } else { - qdisc_notify(net, skb, n, clid, NULL, q, NULL); + qdisc_get_notify(net, skb, n, clid, q, NULL); } return 0; } @@ -1936,6 +2006,9 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1949,6 +2022,27 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, n->nlmsg_flags & NLM_F_ECHO); } +static int tclass_get_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS, + extack) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, @@ -1962,14 +2056,18 @@ static int tclass_del_notify(struct net *net, if (!cops->delete) return -EOPNOTSUPP; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; + if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, - RTM_DELTCLASS, extack) < 0) { - kfree_skb(skb); - return -EINVAL; + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, + RTM_DELTCLASS, extack) < 0) { + kfree_skb(skb); + return -EINVAL; + } + } else { + skb = NULL; } err = cops->delete(q, cl, extack); @@ -1978,8 +2076,8 @@ static int tclass_del_notify(struct net *net, return err; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); return err; } @@ -2174,7 +2272,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: - err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack); + err = tclass_get_notify(net, skb, n, q, cl, extack); goto out; default: err = -EINVAL; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8dd0e5925342..e33568df97a5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1051,6 +1051,9 @@ static void qdisc_free_cb(struct rcu_head *head) static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; + struct net_device *dev = qdisc_dev(qdisc); + const struct Qdisc_class_ops *cops; + struct tcf_block *block; #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc); @@ -1061,11 +1064,24 @@ static void __qdisc_destroy(struct Qdisc *qdisc) qdisc_reset(qdisc); + cops = ops->cl_ops; + if (ops->ingress_block_get) { + block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); + if (block) + xa_erase(&block->ports, dev->ifindex); + } + + if (ops->egress_block_get) { + block = cops->tcf_block(qdisc, TC_H_MIN_EGRESS, NULL); + if (block) + xa_erase(&block->ports, dev->ifindex); + } + if (ops->destroy) ops->destroy(qdisc); module_put(ops->owner); - netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(dev, &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fb02bbb4b34..6b9fcdb0952a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2102,6 +2102,10 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (sk_can_busy_loop(sk) && + skb_queue_empty_lockless(&sk->sk_receive_queue)) + sk_busy_loop(sk, flags & MSG_DONTWAIT); + lock_sock(sk); if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) && @@ -9046,12 +9050,6 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err) if (sk->sk_shutdown & RCV_SHUTDOWN) break; - if (sk_can_busy_loop(sk)) { - sk_busy_loop(sk, flags & MSG_DONTWAIT); - - if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) - continue; - } /* User doesn't want to wait. */ error = -EAGAIN; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7fc2f3c6d248..a2cb30af46cb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -677,8 +677,6 @@ static bool smc_isascii(char *hostname) static void smc_conn_save_peer_info_fce(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)clc; struct smc_clc_first_contact_ext *fce; int clc_v2_len; @@ -687,17 +685,15 @@ static void smc_conn_save_peer_info_fce(struct smc_sock *smc, return; if (smc->conn.lgr->is_smcd) { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid, + memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid, SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - d1); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1); } else { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid, + memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid, SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - r1); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1); } - fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len); + fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len); smc->conn.lgr->peer_os = fce->os_type; smc->conn.lgr->peer_smc_release = fce->release; if (smc_isascii(fce->hostname)) @@ -1048,7 +1044,8 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, { int rc = SMC_CLC_DECL_NOSMCDDEV; struct smcd_dev *smcd; - int i = 1; + int i = 1, entry = 1; + bool is_virtual; u16 chid; if (smcd_indicated(ini->smc_type_v1)) @@ -1060,14 +1057,23 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, chid = smc_ism_get_chid(smcd); if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) continue; + is_virtual = __smc_ism_is_virtual(chid); if (!smc_pnet_is_pnetid_set(smcd->pnetid) || smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { + if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES) + /* It's the last GID-CHID entry left in CLC + * Proposal SMC-Dv2 extension, but a virtual + * ISM device will take two entries. So give + * up it and try the next potential ISM device. + */ + continue; ini->ism_dev[i] = smcd; ini->ism_chid[i] = chid; ini->is_smcd = true; rc = 0; i++; - if (i > SMC_MAX_ISM_DEVS) + entry = is_virtual ? entry + 2 : entry + 1; + if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES) break; } } @@ -1149,13 +1155,13 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, } #define SMC_CLC_MAX_ACCEPT_LEN \ - (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ + (sizeof(struct smc_clc_msg_accept_confirm) + \ sizeof(struct smc_clc_first_contact_ext_v2x) + \ sizeof(struct smc_clc_msg_trail)) /* CLC handshake during connect */ static int smc_connect_clc(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm_v2 *aclc2, + struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { int rc = 0; @@ -1165,7 +1171,7 @@ static int smc_connect_clc(struct smc_sock *smc, if (rc) return rc; /* receive SMC Accept CLC message */ - return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, + return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } @@ -1201,10 +1207,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(clc_v2, false); + smc_get_clc_first_contact_ext(aclc, false); struct net *net = sock_net(&smc->sk); int rc; @@ -1327,10 +1331,7 @@ static int smc_connect_rdma(struct smc_sock *smc, } if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->r1.eid; + eid = aclc->r1.eid; if (ini->first_contact_local) smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist, link->smcibdev, link->gid); @@ -1371,7 +1372,7 @@ connect_abort: * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. */ static int -smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, +smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { int i; @@ -1398,12 +1399,9 @@ static int smc_connect_ism(struct smc_sock *smc, ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; if (aclc->hdr.version == SMC_V2) { - struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - if (ini->first_contact_peer) { struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(aclc_v2, true); + smc_get_clc_first_contact_ext(aclc, true); ini->release_nr = fce->release; rc = smc_clc_clnt_v2x_features_validate(fce, ini); @@ -1411,11 +1409,16 @@ static int smc_connect_ism(struct smc_sock *smc, return rc; } - rc = smc_v2_determine_accepted_chid(aclc_v2, ini); + rc = smc_v2_determine_accepted_chid(aclc, ini); if (rc) return rc; + + if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected])) + ini->ism_peer_gid[ini->ism_selected].gid_ext = + ntohll(aclc->d1.gid_ext); + /* for non-virtual ISM devices, peer gid_ext remains 0. */ } - ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); + ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); @@ -1437,12 +1440,8 @@ static int smc_connect_ism(struct smc_sock *smc, smc_rx_init(smc); smc_tx_init(smc); - if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->d1.eid; - } + if (aclc->hdr.version > SMC_V1) + eid = aclc->d1.eid; rc = smc_clc_send_confirm(smc, ini->first_contact_local, aclc->hdr.version, eid, ini); @@ -1493,7 +1492,6 @@ static int smc_connect_check_aclc(struct smc_init_info *ini, static int __smc_connect(struct smc_sock *smc) { u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; - struct smc_clc_msg_accept_confirm_v2 *aclc2; struct smc_clc_msg_accept_confirm *aclc; struct smc_init_info *ini = NULL; u8 *buf = NULL; @@ -1541,11 +1539,10 @@ static int __smc_connect(struct smc_sock *smc) rc = SMC_CLC_DECL_MEM; goto fallback; } - aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; - aclc = (struct smc_clc_msg_accept_confirm *)aclc2; + aclc = (struct smc_clc_msg_accept_confirm *)buf; /* perform CLC handshake */ - rc = smc_connect_clc(smc, aclc2, ini); + rc = smc_connect_clc(smc, aclc, ini); if (rc) { /* -EAGAIN on timeout, see tcp_recvmsg() */ if (rc == -EAGAIN) { @@ -2106,7 +2103,8 @@ static bool smc_is_already_selected(struct smcd_dev *smcd, /* check for ISM devices matching proposed ISM devices */ static void smc_check_ism_v2_match(struct smc_init_info *ini, - u16 proposed_chid, u64 proposed_gid, + u16 proposed_chid, + struct smcd_gid *proposed_gid, unsigned int *matches) { struct smcd_dev *smcd; @@ -2118,7 +2116,11 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, continue; if (smc_ism_get_chid(smcd) == proposed_chid && !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { - ini->ism_peer_gid[*matches] = proposed_gid; + ini->ism_peer_gid[*matches].gid = proposed_gid->gid; + if (__smc_ism_is_virtual(proposed_chid)) + ini->ism_peer_gid[*matches].gid_ext = + proposed_gid->gid_ext; + /* non-virtual ISM's peer gid_ext remains 0. */ ini->ism_dev[*matches] = smcd; (*matches)++; break; @@ -2140,9 +2142,11 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, struct smc_clc_v2_extension *smc_v2_ext; struct smc_clc_msg_smcd *pclc_smcd; unsigned int matches = 0; + struct smcd_gid smcd_gid; u8 smcd_version; u8 *eid = NULL; int i, rc; + u16 chid; if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) goto not_found; @@ -2152,18 +2156,35 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); mutex_lock(&smcd_dev_list.mutex); - if (pclc_smcd->ism.chid) + if (pclc_smcd->ism.chid) { /* check for ISM device matching proposed native ISM device */ + smcd_gid.gid = ntohll(pclc_smcd->ism.gid); + smcd_gid.gid_ext = 0; smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), - ntohll(pclc_smcd->ism.gid), &matches); - for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { + &smcd_gid, &matches); + } + for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) { /* check for ISM devices matching proposed non-native ISM * devices */ - smc_check_ism_v2_match(ini, - ntohs(smcd_v2_ext->gidchid[i - 1].chid), - ntohll(smcd_v2_ext->gidchid[i - 1].gid), - &matches); + smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid); + smcd_gid.gid_ext = 0; + chid = ntohs(smcd_v2_ext->gidchid[i].chid); + if (__smc_ism_is_virtual(chid)) { + if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt || + chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid)) + /* each virtual ISM device takes two GID-CHID + * entries and CHID of the second entry repeats + * that of the first entry. + * + * So check if the next GID-CHID entry exists + * and both two entries' CHIDs are the same. + */ + continue; + smcd_gid.gid_ext = + ntohll(smcd_v2_ext->gidchid[++i].gid); + } + smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches); } mutex_unlock(&smcd_dev_list.mutex); @@ -2212,7 +2233,8 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) goto not_found; ini->is_smcd = true; /* prepare ISM check */ - ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); + ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid); + ini->ism_peer_gid[0].gid_ext = 0; rc = smc_find_ism_device(new_smc, ini); if (rc) goto not_found; diff --git a/net/smc/smc.h b/net/smc/smc.h index cd51261b7d9e..df64efd2dee8 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -29,9 +29,6 @@ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ -#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM - * devices - */ #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; @@ -58,6 +55,13 @@ enum smc_state { /* possible states of an SMC socket */ SMC_PROCESSABORT = 27, }; +enum smc_supplemental_features { + SMC_SPF_VIRT_ISM_DEV = 0, +}; + +#define SMC_FEATURE_MASK \ + (BIT(SMC_SPF_VIRT_ISM_DEV)) + struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 95e19aa3e769..9a13709bea1c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid) rc = 0; } } +#if IS_ENABLED(CONFIG_S390) if (!rc && !smc_clc_eid_table.ueid_cnt) { smc_clc_eid_table.seid_enabled = 1; rc = -EAGAIN; /* indicate success and enabling of seid */ } +#endif write_unlock(&smc_clc_eid_table.lock); return rc; } @@ -273,22 +275,30 @@ err: int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) { +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); smc_clc_eid_table.seid_enabled = 1; write_unlock(&smc_clc_eid_table.lock); return 0; +#else + return -EOPNOTSUPP; +#endif } int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) { int rc = 0; +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); if (!smc_clc_eid_table.ueid_cnt) rc = -ENOENT; else smc_clc_eid_table.seid_enabled = 0; write_unlock(&smc_clc_eid_table.lock); +#else + rc = -EOPNOTSUPP; +#endif return rc; } @@ -377,9 +387,9 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) /* check arriving CLC accept or confirm */ static bool -smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) +smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc) { - struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; + struct smc_clc_msg_hdr *hdr = &clc->hdr; if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) return false; @@ -418,15 +428,16 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) return true; } -static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, - struct smc_init_info *ini) +static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_init_info *ini) { - int ret = sizeof(*fce); + int ret = sizeof(*fce_v2x); - memset(fce, 0, sizeof(*fce)); - fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX; - fce->fce_v2_base.release = ini->release_nr; - memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname)); + memset(fce_v2x, 0, sizeof(*fce_v2x)); + fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX; + fce_v2x->fce_v2_base.release = ini->release_nr; + memcpy(fce_v2x->fce_v2_base.hostname, + smc_hostname, sizeof(smc_hostname)); if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) { ret = sizeof(struct smc_clc_first_contact_ext); goto out; @@ -434,9 +445,10 @@ static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, if (ini->release_nr >= SMC_RELEASE_1) { if (!ini->is_smcd) { - fce->max_conns = ini->max_conns; - fce->max_links = ini->max_links; + fce_v2x->max_conns = ini->max_conns; + fce_v2x->max_links = ini->max_links; } + fce_v2x->feature_mask = htons(ini->feature_mask); } out: @@ -448,7 +460,7 @@ out: */ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2; + struct smc_clc_msg_accept_confirm *clc; struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; @@ -466,12 +478,11 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: - clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; - if (!smc_clc_msg_acc_conf_valid(clc_v2)) + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (!smc_clc_msg_acc_conf_valid(clc)) return false; trl = (struct smc_clc_msg_trail *) - ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - - sizeof(*trl)); + ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -882,11 +893,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ETH_ALEN); } if (smcd_indicated(ini->smc_type_v1)) { + struct smcd_gid smcd_gid; + /* add SMC-D specifics */ if (ini->ism_dev[0]) { smcd = ini->ism_dev[0]; - pclc_smcd->ism.gid = - htonll(smcd->ops->get_local_gid(smcd)); + smcd->ops->get_local_gid(smcd, &smcd_gid); + pclc_smcd->ism.gid = htonll(smcd_gid.gid); pclc_smcd->ism.chid = htons(smc_ism_get_chid(ini->ism_dev[0])); } @@ -907,6 +920,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_smcd->v2_ext_offset = htons(v2_ext_offset); plen += sizeof(*v2_ext); + v2_ext->feature_mask = htons(SMC_FEATURE_MASK); read_lock(&smc_clc_eid_table.lock); v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; @@ -918,10 +932,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) read_unlock(&smc_clc_eid_table.lock); } if (smcd_indicated(ini->smc_type_v2)) { + struct smcd_gid smcd_gid; u8 *eid = NULL; + int entry = 0; v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; - v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + @@ -933,14 +948,26 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { smcd = ini->ism_dev[i]; - gidchids[i - 1].gid = - htonll(smcd->ops->get_local_gid(smcd)); - gidchids[i - 1].chid = + smcd->ops->get_local_gid(smcd, &smcd_gid); + gidchids[entry].chid = htons(smc_ism_get_chid(ini->ism_dev[i])); + gidchids[entry].gid = htonll(smcd_gid.gid); + if (smc_ism_is_virtual(smcd)) { + /* a virtual ISM device takes two + * entries. CHID of the second entry + * repeats that of the first entry. + */ + gidchids[entry + 1].chid = + gidchids[entry].chid; + gidchids[entry + 1].gid = + htonll(smcd_gid.gid_ext); + entry++; + } + entry++; } - plen += ini->ism_offered_cnt * - sizeof(struct smc_clc_smcd_gid_chid); + plen += entry * sizeof(struct smc_clc_smcd_gid_chid); } + v2_ext->hdr.ism_gid_cnt = entry; } if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); @@ -976,7 +1003,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) vec[i++].iov_len = sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { vec[i].iov_base = gidchids; - vec[i++].iov_len = ini->ism_offered_cnt * + vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt * sizeof(struct smc_clc_smcd_gid_chid); } } @@ -997,109 +1024,143 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return reason_code; } +static void +smcd_clc_prep_confirm_accept(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc, + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini, + int *fce_len, + struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_clc_msg_trail *trl) +{ + struct smcd_dev *smcd = conn->lgr->smcd; + struct smcd_gid smcd_gid; + u16 chid; + int len; + + /* SMC-D specific settings */ + memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + smcd->ops->get_local_gid(smcd, &smcd_gid); + clc->hdr.typev1 = SMC_TYPE_D; + clc->d0.gid = htonll(smcd_gid.gid); + clc->d0.token = htonll(conn->rmb_desc->token); + clc->d0.dmbe_size = conn->rmbe_size_comp; + clc->d0.dmbe_idx = 0; + memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + } else { + chid = smc_ism_get_chid(smcd); + clc->d1.chid = htons(chid); + if (eid && eid[0]) + memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN); + if (__smc_ism_is_virtual(chid)) + clc->d1.gid_ext = htonll(smcd_gid.gid_ext); + len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); + len += *fce_len; + } + clc->hdr.length = htons(len); + } + memcpy(trl->eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); +} + +static void +smcr_clc_prep_confirm_accept(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc, + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini, + int *fce_len, + struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_clc_fce_gid_ext *gle, + struct smc_clc_msg_trail *trl) +{ + struct smc_link *link = conn->lnk; + int len; + + /* SMC-R specific settings */ + memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + clc->hdr.typev1 = SMC_TYPE_R; + memcpy(clc->r0.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); + memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(clc->r0.qpn, link->roce_qp->qp_num); + clc->r0.rmb_rkey = + htonl(conn->rmb_desc->mr[link->link_idx]->rkey); + clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ + clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); + switch (clc->hdr.type) { + case SMC_CLC_ACCEPT: + clc->r0.qp_mtu = link->path_mtu; + break; + case SMC_CLC_CONFIRM: + clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); + break; + } + clc->r0.rmbe_size = conn->rmbe_size_comp; + clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? + cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : + cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[link->link_idx].sgl)); + hton24(clc->r0.psn, link->psn_initial); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + } else { + if (eid && eid[0]) + memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN); + len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); + len += *fce_len; + fce_v2x->fce_v2_base.v2_direct = + !link->lgr->uses_gateway; + if (clc->hdr.type == SMC_CLC_CONFIRM) { + memset(gle, 0, sizeof(*gle)); + gle->gid_cnt = ini->smcrv2.gidlist.len; + len += sizeof(*gle); + len += gle->gid_cnt * sizeof(gle->gid[0]); + } + } + clc->hdr.length = htons(len); + } + memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); +} + /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm_v2 *clc_v2, + struct smc_clc_msg_accept_confirm *clc, int first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { + struct smc_clc_first_contact_ext_v2x fce_v2x; struct smc_connection *conn = &smc->conn; - struct smc_clc_first_contact_ext_v2x fce; - struct smcd_dev *smcd = conn->lgr->smcd; - struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; - int i, len, fce_len; + int i, fce_len; struct kvec vec[5]; struct msghdr msg; /* send SMC Confirm CLC msg */ - clc = (struct smc_clc_msg_accept_confirm *)clc_v2; clc->hdr.version = version; /* SMC version */ if (first_contact) clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; - if (conn->lgr->is_smcd) { - /* SMC-D specific settings */ - memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); - clc->d0.token = htonll(conn->rmb_desc->token); - clc->d0.dmbe_size = conn->rmbe_size_comp; - clc->d0.dmbe_idx = 0; - memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); - } else { - clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); - if (eid && eid[0]) - memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); - len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - fce_len = smc_clc_fill_fce(&fce, ini); - len += fce_len; - } - clc_v2->hdr.length = htons(len); - } - memcpy(trl.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - } else { - struct smc_link *link = conn->lnk; - - /* SMC-R specific settings */ - memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - clc->hdr.typev1 = SMC_TYPE_R; - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - memcpy(clc->r0.lcl.id_for_peer, local_systemid, - sizeof(local_systemid)); - memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); - memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], - ETH_ALEN); - hton24(clc->r0.qpn, link->roce_qp->qp_num); - clc->r0.rmb_rkey = - htonl(conn->rmb_desc->mr[link->link_idx]->rkey); - clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ - clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); - switch (clc->hdr.type) { - case SMC_CLC_ACCEPT: - clc->r0.qp_mtu = link->path_mtu; - break; - case SMC_CLC_CONFIRM: - clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); - break; - } - clc->r0.rmbe_size = conn->rmbe_size_comp; - clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? - cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : - cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[link->link_idx].sgl)); - hton24(clc->r0.psn, link->psn_initial); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - } else { - if (eid && eid[0]) - memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); - len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - fce_len = smc_clc_fill_fce(&fce, ini); - len += fce_len; - fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway; - if (clc->hdr.type == SMC_CLC_CONFIRM) { - memset(&gle, 0, sizeof(gle)); - gle.gid_cnt = ini->smcrv2.gidlist.len; - len += sizeof(gle); - len += gle.gid_cnt * sizeof(gle.gid[0]); - } - } - clc_v2->hdr.length = htons(len); - } - memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - } - + if (conn->lgr->is_smcd) + smcd_clc_prep_confirm_accept(conn, clc, first_contact, + version, eid, ini, &fce_len, + &fce_v2x, &trl); + else + smcr_clc_prep_confirm_accept(conn, clc, first_contact, + version, eid, ini, &fce_len, + &fce_v2x, &gle, &trl); memset(&msg, 0, sizeof(msg)); i = 0; - vec[i].iov_base = clc_v2; + vec[i].iov_base = clc; if (version > SMC_V1) vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : @@ -1111,7 +1172,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, SMCR_CLC_ACCEPT_CONFIRM_LEN) - sizeof(trl); if (version > SMC_V1 && first_contact) { - vec[i].iov_base = &fce; + vec[i].iov_base = &fce_v2x; vec[i++].iov_len = fce_len; if (!conn->lgr->is_smcd) { if (clc->hdr.type == SMC_CLC_CONFIRM) { @@ -1133,16 +1194,16 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 cclc_v2; + struct smc_clc_msg_accept_confirm cclc; int reason_code = 0; int len; /* send SMC Confirm CLC msg */ - memset(&cclc_v2, 0, sizeof(cclc_v2)); - cclc_v2.hdr.type = SMC_CLC_CONFIRM; - len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, + memset(&cclc, 0, sizeof(cclc)); + cclc.hdr.type = SMC_CLC_CONFIRM; + len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact, version, eid, ini); - if (len < ntohs(cclc_v2.hdr.length)) { + if (len < ntohs(cclc.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -1158,14 +1219,14 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 aclc_v2; + struct smc_clc_msg_accept_confirm aclc; int len; - memset(&aclc_v2, 0, sizeof(aclc_v2)); - aclc_v2.hdr.type = SMC_CLC_ACCEPT; - len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, + memset(&aclc, 0, sizeof(aclc)); + aclc.hdr.type = SMC_CLC_ACCEPT; + len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact, version, negotiated_eid, ini); - if (len < ntohs(aclc_v2.hdr.length)) + if (len < ntohs(aclc.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; @@ -1180,6 +1241,7 @@ int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, ini->max_conns = SMC_CONN_PER_LGR_MAX; ini->max_links = SMC_LINKS_ADD_LNK_MAX; + ini->feature_mask = SMC_FEATURE_MASK; if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) || ini->release_nr < SMC_RELEASE_1) @@ -1223,6 +1285,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, return SMC_CLC_DECL_MAXLINKERR; ini->max_links = fce_v2x->max_links; } + /* common supplemental features of server and client */ + ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK; return 0; } @@ -1230,10 +1294,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)cclc; struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd); + smc_get_clc_first_contact_ext(cclc, ini->is_smcd); struct smc_clc_first_contact_ext_v2x *fce_v2x = (struct smc_clc_first_contact_ext_v2x *)fce; @@ -1253,6 +1315,8 @@ int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, if (fce_v2x->max_links != ini->max_links) return SMC_CLC_DECL_MAXLINKERR; } + /* common supplemental features returned by client */ + ini->feature_mask = ntohs(fce_v2x->feature_mask); return 0; } @@ -1274,7 +1338,11 @@ void __init smc_clc_init(void) INIT_LIST_HEAD(&smc_clc_eid_table.list); rwlock_init(&smc_clc_eid_table.lock); smc_clc_eid_table.ueid_cnt = 0; +#if IS_ENABLED(CONFIG_S390) smc_clc_eid_table.seid_enabled = 1; +#else + smc_clc_eid_table.seid_enabled = 0; +#endif } void smc_clc_exit(void) diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 1697b84d85be..a9f9bdd26dcd 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -138,7 +138,8 @@ struct smc_clc_v2_extension { u8 roce[16]; /* RoCEv2 GID */ u8 max_conns; u8 max_links; - u8 reserved[14]; + __be16 feature_mask; + u8 reserved[12]; u8 user_eids[][SMC_MAX_EID_LEN]; }; @@ -171,6 +172,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ #define SMC_CLC_MAX_V6_PREFIX 8 #define SMC_CLC_MAX_UEID 8 +#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC + * proposal SMC-Dv2 extension. + * each ISM device takes one entry and + * each virtual ISM takes two entries. + */ struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal pclc_base; @@ -180,7 +186,8 @@ struct smc_clc_msg_proposal_area { struct smc_clc_v2_extension pclc_v2_ext; u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN]; struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; - struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; + struct smc_clc_smcd_gid_chid + pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES]; struct smc_clc_msg_trail pclc_trl; }; @@ -240,9 +247,14 @@ struct smc_clc_first_contact_ext { struct smc_clc_first_contact_ext_v2x { struct smc_clc_first_contact_ext fce_v2_base; - u8 max_conns; /* for SMC-R only */ - u8 max_links; /* for SMC-R only */ - u8 reserved3[2]; + union { + struct { + u8 max_conns; /* for SMC-R only */ + u8 max_links; /* for SMC-R only */ + }; + u8 reserved3[2]; /* for SMC-D only */ + }; + __be16 feature_mask; __be32 vendor_exp_options; u8 reserved4[8]; } __packed; /* format defined in @@ -259,28 +271,21 @@ struct smc_clc_fce_gid_ext { struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { - struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ - struct { /* SMC-D */ - struct smcd_clc_msg_accept_confirm_common d0; - u32 reserved5[3]; - }; - }; -} __packed; /* format defined in RFC7609 */ - -struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ - struct smc_clc_msg_hdr hdr; - union { struct { /* SMC-R */ struct smcr_clc_msg_accept_confirm r0; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved6[8]; - } r1; + struct { /* v2 only */ + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved6[8]; + } __packed r1; + }; struct { /* SMC-D */ struct smcd_clc_msg_accept_confirm_common d0; - __be16 chid; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved5[8]; - } d1; + struct { /* v2 only, but 12 bytes reserved in v1 */ + __be16 chid; + u8 eid[SMC_MAX_EID_LEN]; + __be64 gid_ext; + } __packed d1; + }; }; }; @@ -389,24 +394,23 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) } static inline struct smc_clc_first_contact_ext * -smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2, +smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm *clc, bool is_smcd) { int clc_v2_len; - if (clc_v2->hdr.version == SMC_V1 || - !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + if (clc->hdr.version == SMC_V1 || + !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) return NULL; if (is_smcd) clc_v2_len = - offsetofend(struct smc_clc_msg_accept_confirm_v2, d1); + offsetofend(struct smc_clc_msg_accept_confirm, d1); else clc_v2_len = - offsetofend(struct smc_clc_msg_accept_confirm_v2, r1); + offsetofend(struct smc_clc_msg_accept_confirm, r1); - return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + - clc_v2_len); + return (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len); } struct smcd_dev; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d520ee62c8ec..95cc95458e2d 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -506,6 +506,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, { char smc_pnet[SMC_MAX_PNETID_LEN + 1]; struct smcd_dev *smcd = lgr->smcd; + struct smcd_gid smcd_gid; struct nlattr *attrs; void *nlh; @@ -521,13 +522,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id))) goto errattr; + smcd->ops->get_local_gid(smcd, &smcd_gid); if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, - smcd->ops->get_local_gid(smcd), - SMC_NLA_LGR_D_PAD)) + smcd_gid.gid, SMC_NLA_LGR_D_PAD)) goto errattr; - if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid, + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID, + smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD)) + goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid, SMC_NLA_LGR_D_PAD)) goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID, + lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD)) + goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id)) goto errattr; if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num)) @@ -876,7 +883,10 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) /* SMC-D specific settings */ smcd = ini->ism_dev[ini->ism_selected]; get_device(smcd->ops->get_dev(smcd)); - lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; + lgr->peer_gid.gid = + ini->ism_peer_gid[ini->ism_selected].gid; + lgr->peer_gid.gid_ext = + ini->ism_peer_gid[ini->ism_selected].gid_ext; lgr->smcd = ini->ism_dev[ini->ism_selected]; lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; @@ -1514,7 +1524,8 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr) } /* Called when peer lgr shutdown (regularly or abnormally) is received */ -void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) +void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, + unsigned short vlan) { struct smc_link_group *lgr, *l; LIST_HEAD(lgr_free_list); @@ -1522,9 +1533,12 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) /* run common cleanup function and build free list */ spin_lock_bh(&dev->lgr_lock); list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { - if ((!peer_gid || lgr->peer_gid == peer_gid) && + if ((!peer_gid->gid || + (lgr->peer_gid.gid == peer_gid->gid && + !smc_ism_is_virtual(dev) ? 1 : + lgr->peer_gid.gid_ext == peer_gid->gid_ext)) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { - if (peer_gid) /* peer triggered termination */ + if (peer_gid->gid) /* peer triggered termination */ lgr->peer_shutdown = 1; list_move(&lgr->list, &lgr_free_list); lgr->freeing = 1; @@ -1860,9 +1874,12 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, } static bool smcd_lgr_match(struct smc_link_group *lgr, - struct smcd_dev *smcismdev, u64 peer_gid) + struct smcd_dev *smcismdev, + struct smcd_gid *peer_gid) { - return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; + return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && + smc_ism_is_virtual(smcismdev) ? + (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; } /* create a new SMC connection (and a new link group if necessary) */ @@ -1892,7 +1909,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], - ini->ism_peer_gid[ini->ism_selected]) : + &ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->smcr_version, ini->peer_systemid, ini->peer_gid, ini->peer_mac, role, diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 120027d40469..1f175376037b 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -17,9 +17,11 @@ #include <linux/pci.h> #include <rdma/ib_verbs.h> #include <net/genetlink.h> +#include <net/smc.h> #include "smc.h" #include "smc_ib.h" +#include "smc_clc.h" #define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */ #define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */ @@ -355,7 +357,7 @@ struct smc_link_group { /* max links can be added in lgr */ }; struct { /* SMC-D */ - u64 peer_gid; + struct smcd_gid peer_gid; /* Peer GID (remote) */ struct smcd_dev *smcd; /* ISM device for VLAN reg. */ @@ -392,6 +394,11 @@ struct smc_init_info_smcrv2 { struct smc_gidlist gidlist; }; +#define SMC_MAX_V2_ISM_DEVS SMCD_CLC_MAX_V2_GID_ENTRIES + /* max # of proposed non-native ISM devices, + * which can't exceed the max # of CHID-GID + * entries in CLC proposal SMC-Dv2 extension. + */ struct smc_init_info { u8 is_smcd; u8 smc_type_v1; @@ -401,6 +408,7 @@ struct smc_init_info { u8 max_links; u8 first_contact_peer; u8 first_contact_local; + u16 feature_mask; unsigned short vlan_id; u32 rc; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -416,9 +424,9 @@ struct smc_init_info { u32 ib_clcqpn; struct smc_init_info_smcrv2 smcrv2; /* SMC-D */ - u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; - struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; - u16 ism_chid[SMC_MAX_ISM_DEVS + 1]; + struct smcd_gid ism_peer_gid[SMC_MAX_V2_ISM_DEVS + 1]; + struct smcd_dev *ism_dev[SMC_MAX_V2_ISM_DEVS + 1]; + u16 ism_chid[SMC_MAX_V2_ISM_DEVS + 1]; u8 ism_offered_cnt; /* # of ISM devices offered */ u8 ism_selected; /* index of selected ISM dev*/ u8 smcd_version; @@ -544,7 +552,7 @@ void smc_lgr_hold(struct smc_link_group *lgr); void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); -void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, +void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, unsigned short vlan); void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index a584613aca12..3fbe14e09ad8 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -21,6 +21,7 @@ #include "smc.h" #include "smc_core.h" +#include "smc_ism.h" struct smc_diag_dump_ctx { int pos[2]; @@ -168,12 +169,16 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; + struct smcd_gid smcd_gid; memset(&dinfo, 0, sizeof(dinfo)); dinfo.linkid = *((u32 *)conn->lgr->id); - dinfo.peer_gid = conn->lgr->peer_gid; - dinfo.my_gid = smcd->ops->get_local_gid(smcd); + dinfo.peer_gid = conn->lgr->peer_gid.gid; + dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext; + smcd->ops->get_local_gid(smcd, &smcd_gid); + dinfo.my_gid = smcd_gid.gid; + dinfo.my_gid_ext = smcd_gid.gid_ext; dinfo.token = conn->rmb_desc->token; dinfo.peer_token = conn->peer_token; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index fbee2493091f..ac88de2a06a0 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -43,8 +43,30 @@ static struct ism_client smc_ism_client = { }; #endif +static void smc_ism_create_system_eid(void) +{ + struct smc_ism_seid *seid = + (struct smc_ism_seid *)smc_ism_v2_system_eid; +#if IS_ENABLED(CONFIG_S390) + struct cpuid id; + u16 ident_tail; + char tmp[5]; + + memcpy(seid->seid_string, "IBM-SYSZ-ISMSEID00000000", 24); + get_cpu_id(&id); + ident_tail = (u16)(id.ident & SMC_ISM_IDENT_MASK); + snprintf(tmp, 5, "%04X", ident_tail); + memcpy(seid->serial_number, tmp, 4); + snprintf(tmp, 5, "%04X", id.machine); + memcpy(seid->type, tmp, 4); +#else + memset(seid, 0, SMC_MAX_EID_LEN); +#endif +} + /* Test if an ISM communication is possible - same CPC */ -int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) +int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id, + struct smcd_dev *smcd) { return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, vlan_id); @@ -208,7 +230,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, dmb.dmb_len = dmb_len; dmb.sba_idx = dmb_desc->sba_idx; dmb.vlan_id = lgr->vlan_id; - dmb.rgid = lgr->peer_gid; + dmb.rgid = lgr->peer_gid.gid; rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client); if (!rc) { dmb_desc->sba_idx = dmb.sba_idx; @@ -340,18 +362,20 @@ union smcd_sw_event_info { static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) { + struct smcd_gid peer_gid = { .gid = wrk->event.tok, + .gid_ext = 0 }; union smcd_sw_event_info ev_info; ev_info.info = wrk->event.info; switch (wrk->event.code) { case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */ - smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id); + smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id); break; case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ if (ev_info.code == ISM_EVENT_REQUEST) { ev_info.code = ISM_EVENT_RESPONSE; wrk->smcd->ops->signal_event(wrk->smcd, - wrk->event.tok, + &peer_gid, ISM_EVENT_REQUEST_IR, ISM_EVENT_CODE_TESTLINK, ev_info.info); @@ -365,10 +389,12 @@ static void smc_ism_event_work(struct work_struct *work) { struct smc_ism_event_work *wrk = container_of(work, struct smc_ism_event_work, work); + struct smcd_gid smcd_gid = { .gid = wrk->event.tok, + .gid_ext = 0 }; switch (wrk->event.type) { case ISM_EVENT_GID: /* GID event, token is peer GID */ - smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK); + smc_smcd_terminate(wrk->smcd, &smcd_gid, VLAN_VID_MASK); break; case ISM_EVENT_DMB: break; @@ -426,14 +452,8 @@ static void smcd_register_dev(struct ism_dev *ism) mutex_lock(&smcd_dev_list.mutex); if (list_empty(&smcd_dev_list.list)) { - u8 *system_eid = NULL; - - system_eid = smcd->ops->get_system_eid(); - if (smcd->ops->supports_v2()) { + if (smcd->ops->supports_v2()) smc_ism_v2_capable = true; - memcpy(smc_ism_v2_system_eid, system_eid, - SMC_MAX_EID_LEN); - } } /* sort list: devices without pnetid before devices with pnetid */ if (smcd->pnetid[0]) @@ -525,7 +545,7 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr) memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE); ev_info.vlan_id = lgr->vlan_id; ev_info.code = ISM_EVENT_REQUEST; - rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid, + rc = lgr->smcd->ops->signal_event(lgr->smcd, &lgr->peer_gid, ISM_EVENT_REQUEST_IR, ISM_EVENT_CODE_SHUTDOWN, ev_info.info); @@ -537,10 +557,10 @@ int smc_ism_init(void) { int rc = 0; -#if IS_ENABLED(CONFIG_ISM) smc_ism_v2_capable = false; - memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN); + smc_ism_create_system_eid(); +#if IS_ENABLED(CONFIG_ISM) rc = ism_register_client(&smc_ism_client); #endif return rc; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 832b2f42d79f..ffff40c30a06 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -15,6 +15,9 @@ #include "smc.h" +#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00 +#define SMC_ISM_IDENT_MASK 0x00FFFF + struct smcd_dev_list { /* List of SMCD devices */ struct list_head list; struct mutex mutex; /* Protects list of devices */ @@ -28,9 +31,16 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */ refcount_t refcnt; /* Reference count */ }; +struct smc_ism_seid { + u8 seid_string[24]; + u8 serial_number[4]; + u8 type[4]; +}; + struct smcd_dev; -int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); +int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id, + struct smcd_dev *dev); void smc_ism_set_conn(struct smc_connection *conn); void smc_ism_unset_conn(struct smc_connection *conn); int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); @@ -56,4 +66,22 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok, return rc < 0 ? rc : 0; } +static inline bool __smc_ism_is_virtual(u16 chid) +{ + /* CHIDs in range of 0xFF00 to 0xFFFF are reserved + * for virtual ISM device. + * + * loopback-ism: 0xFFFF + * virtio-ism: 0xFF00 ~ 0xFFFE + */ + return ((chid & 0xFF00) == 0xFF00); +} + +static inline bool smc_ism_is_virtual(struct smcd_dev *smcd) +{ + u16 chid = smcd->ops->get_chid(smcd); + + return __smc_ism_is_virtual(chid); +} + #endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 11775401df68..9f2c58c5a86b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1103,8 +1103,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away && - (!ini->ism_peer_gid[0] || - !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, + (!ini->ism_peer_gid[0].gid || + !smc_ism_cantalk(&ini->ism_peer_gid[0], ini->vlan_id, ismdev))) { ini->ism_dev[0] = ismdev; break; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7bfe7d9a32aa..04534ea537c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index fee83d1024bc..1b71055fc391 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -654,9 +654,8 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array_node(GFP_KERNEL, - rqstp->rq_pool->sp_id, - pages, rqstp->rq_pages); + ret = alloc_pages_bulk_array(GFP_KERNEL, pages, + rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; diff --git a/net/tipc/link.c b/net/tipc/link.c index d0143823658d..0716eb5c8a31 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -82,10 +82,7 @@ struct tipc_stats { * struct tipc_link - TIPC link data structure * @addr: network address of link's peer node * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link @@ -94,31 +91,19 @@ struct tipc_stats { * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') * @mon_state: cookie with information needed by link monitor - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset * @mtu: current maximum packet size for this link * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer * @inputq: buffer queue for messages to be delivered upwards * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link * @stats: collects statistics regarding link activity * @session: session to be used by link * @snd_nxt_state: next send seq number diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index f495b9e5186b..1748268e0694 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -153,10 +153,10 @@ virtio_transport_send_pkt_work(struct work_struct *work) * 'virt_to_phys()' later to fill the buffer descriptor. * We don't touch memory at "virtual" address of this page. */ - va = page_to_virt(skb_frag->bv_page); + va = page_to_virt(skb_frag_page(skb_frag)); sg_init_one(sgs[out_sg], - va + skb_frag->bv_offset, - skb_frag->bv_len); + va + skb_frag_off(skb_frag), + skb_frag_size(skb_frag)); out_sg++; } } diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex new file mode 100644 index 000000000000..0d50369bede9 --- /dev/null +++ b/net/wireless/certs/wens.hex @@ -0,0 +1,87 @@ +/* Chen-Yu Tsai's regdb certificate */ +0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f, +0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab, +0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c, +0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d, +0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, +0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31, +0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03, +0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20, +0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31, +0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18, +0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30, +0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, +0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, +0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e, +0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06, +0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, +0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f, +0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01, +0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7, +0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef, +0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16, +0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe, +0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef, +0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f, +0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e, +0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c, +0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48, +0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf, +0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f, +0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e, +0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce, +0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03, +0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93, +0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a, +0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3, +0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb, +0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b, +0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6, +0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a, +0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32, +0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58, +0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b, +0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16, +0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e, +0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83, +0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91, +0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73, +0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36, +0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52, +0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78, +0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, +0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, +0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82, +0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74, +0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1, +0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82, +0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91, +0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f, +0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8, +0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b, +0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0, +0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c, +0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51, +0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b, +0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91, +0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6, +0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84, +0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb, +0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e, +0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63, +0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8, +0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28, +0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62, +0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8, +0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb, +0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5, +0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53, +0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee, +0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa, +0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86, +0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b, +0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4, +0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, +0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, +0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, +0x45, 0x4e, 0xc3, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index aad8ffeaee04..f7a7c7798c3b 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -704,7 +704,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rc = -EINVAL; } release_sock(sk); - SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); + net_dbg_ratelimited("x25_bind: socket is bound\n"); out: return rc; } @@ -1165,10 +1165,10 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; } - SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n"); + net_dbg_ratelimited("x25_sendmsg: sendto: Addresses built.\n"); /* Build a packet */ - SOCK_DEBUG(sk, "x25_sendmsg: sendto: building packet.\n"); + net_dbg_ratelimited("x25_sendmsg: sendto: building packet.\n"); if ((msg->msg_flags & MSG_OOB) && len > 32) len = 32; @@ -1187,7 +1187,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* * Put the data on the end */ - SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n"); + net_dbg_ratelimited("x25_sendmsg: Copying user data\n"); skb_reset_transport_header(skb); skb_put(skb, len); @@ -1211,7 +1211,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* * Push down the X.25 header */ - SOCK_DEBUG(sk, "x25_sendmsg: Building X.25 Header.\n"); + net_dbg_ratelimited("x25_sendmsg: Building X.25 Header.\n"); if (msg->msg_flags & MSG_OOB) { if (x25->neighbour->extended) { @@ -1245,8 +1245,8 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) skb->data[0] |= X25_Q_BIT; } - SOCK_DEBUG(sk, "x25_sendmsg: Built header.\n"); - SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n"); + net_dbg_ratelimited("x25_sendmsg: Built header.\n"); + net_dbg_ratelimited("x25_sendmsg: Transmitting buffer\n"); rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 8e1a49b0c0dc..6dadb217e101 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -282,7 +282,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, * They want reverse charging, we won't accept it. */ if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) { - SOCK_DEBUG(sk, "X.25: rejecting reverse charging request\n"); + net_dbg_ratelimited("X.25: rejecting reverse charging request\n"); return -1; } @@ -294,11 +294,11 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, int ours_in = ours->throughput & 0x0f; int ours_out = ours->throughput & 0xf0; if (!ours_in || theirs_in < ours_in) { - SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n"); + net_dbg_ratelimited("X.25: inbound throughput negotiated\n"); new->throughput = (new->throughput & 0xf0) | theirs_in; } if (!ours_out || theirs_out < ours_out) { - SOCK_DEBUG(sk, + net_dbg_ratelimited( "X.25: outbound throughput negotiated\n"); new->throughput = (new->throughput & 0x0f) | theirs_out; } @@ -306,22 +306,22 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, if (theirs.pacsize_in && theirs.pacsize_out) { if (theirs.pacsize_in < ours->pacsize_in) { - SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down\n"); + net_dbg_ratelimited("X.25: packet size inwards negotiated down\n"); new->pacsize_in = theirs.pacsize_in; } if (theirs.pacsize_out < ours->pacsize_out) { - SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down\n"); + net_dbg_ratelimited("X.25: packet size outwards negotiated down\n"); new->pacsize_out = theirs.pacsize_out; } } if (theirs.winsize_in && theirs.winsize_out) { if (theirs.winsize_in < ours->winsize_in) { - SOCK_DEBUG(sk, "X.25: window size inwards negotiated down\n"); + net_dbg_ratelimited("X.25: window size inwards negotiated down\n"); new->winsize_in = theirs.winsize_in; } if (theirs.winsize_out < ours->winsize_out) { - SOCK_DEBUG(sk, "X.25: window size outwards negotiated down\n"); + net_dbg_ratelimited("X.25: window size outwards negotiated down\n"); new->winsize_out = theirs.winsize_out; } } diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c index dbc0940bf35f..f8922b0e23a4 100644 --- a/net/x25/x25_out.c +++ b/net/x25/x25_out.c @@ -72,7 +72,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return sent; } - SOCK_DEBUG(sk, "x25_output: fragment alloc" + net_dbg_ratelimited("x25_output: fragment alloc" " failed, err=%d, %d bytes " "sent\n", err, sent); return err; diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c index ab90bb08a2b4..356f756cba0d 100644 --- a/samples/bpf/cpustat_user.c +++ b/samples/bpf/cpustat_user.c @@ -66,10 +66,10 @@ static void cpu_stat_print(void) printf("CPU-%-6d ", j); for (i = 0; i < MAX_CSTATE_ENTRIES; i++) - printf("%-11ld ", data->cstate[i] / 1000000); + printf("%-11lu ", data->cstate[i] / 1000000); for (i = 0; i < MAX_PSTATE_ENTRIES; i++) - printf("%-11ld ", data->pstate[i] / 1000000); + printf("%-11lu ", data->pstate[i] / 1000000); printf("\n"); } diff --git a/security/security.c b/security/security.c index 088a79c35c26..dcb3e7014f9b 100644 --- a/security/security.c +++ b/security/security.c @@ -5167,87 +5167,29 @@ int security_bpf_prog(struct bpf_prog *prog) } /** - * security_bpf_map_create() - Check if BPF map creation is allowed - * @map: BPF map object - * @attr: BPF syscall attributes used to create BPF map - * @token: BPF token used to grant user access - * - * Do a check when the kernel creates a new BPF map. This is also the - * point where LSM blob is allocated for LSMs that need them. - * - * Return: Returns 0 on success, error on failure. - */ -int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) -{ - return call_int_hook(bpf_map_create, 0, map, attr, token); -} - -/** - * security_bpf_prog_load() - Check if loading of BPF program is allowed - * @prog: BPF program object - * @attr: BPF syscall attributes used to create BPF program - * @token: BPF token used to grant user access to BPF subsystem - * - * Perform an access control check when the kernel loads a BPF program and - * allocates associated BPF program object. This hook is also responsible for - * allocating any required LSM state for the BPF program. - * - * Return: Returns 0 on success, error on failure. - */ -int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) -{ - return call_int_hook(bpf_prog_load, 0, prog, attr, token); -} - -/** - * security_bpf_token_create() - Check if creating of BPF token is allowed - * @token: BPF token object - * @attr: BPF syscall attributes used to create BPF token - * @path: path pointing to BPF FS mount point from which BPF token is created - * - * Do a check when the kernel instantiates a new BPF token object from BPF FS - * instance. This is also the point where LSM blob can be allocated for LSMs. - * - * Return: Returns 0 on success, error on failure. - */ -int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) -{ - return call_int_hook(bpf_token_create, 0, token, attr, path); -} - -/** - * security_bpf_token_cmd() - Check if BPF token is allowed to delegate - * requested BPF syscall command - * @token: BPF token object - * @cmd: BPF syscall command requested to be delegated by BPF token + * security_bpf_map_alloc() - Allocate a bpf map LSM blob + * @map: bpf map * - * Do a check when the kernel decides whether provided BPF token should allow - * delegation of requested BPF syscall command. + * Initialize the security field inside bpf map. * * Return: Returns 0 on success, error on failure. */ -int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +int security_bpf_map_alloc(struct bpf_map *map) { - return call_int_hook(bpf_token_cmd, 0, token, cmd); + return call_int_hook(bpf_map_alloc_security, 0, map); } /** - * security_bpf_token_capable() - Check if BPF token is allowed to delegate - * requested BPF-related capability - * @token: BPF token object - * @cap: capabilities requested to be delegated by BPF token + * security_bpf_prog_alloc() - Allocate a bpf program LSM blob + * @aux: bpf program aux info struct * - * Do a check when the kernel decides whether provided BPF token should allow - * delegation of requested BPF-related capabilities. + * Initialize the security field inside bpf program. * * Return: Returns 0 on success, error on failure. */ -int security_bpf_token_capable(const struct bpf_token *token, int cap) +int security_bpf_prog_alloc(struct bpf_prog_aux *aux) { - return call_int_hook(bpf_token_capable, 0, token, cap); + return call_int_hook(bpf_prog_alloc_security, 0, aux); } /** @@ -5258,29 +5200,18 @@ int security_bpf_token_capable(const struct bpf_token *token, int cap) */ void security_bpf_map_free(struct bpf_map *map) { - call_void_hook(bpf_map_free, map); -} - -/** - * security_bpf_prog_free() - Free a BPF program's LSM blob - * @prog: BPF program struct - * - * Clean up the security information stored inside BPF program. - */ -void security_bpf_prog_free(struct bpf_prog *prog) -{ - call_void_hook(bpf_prog_free, prog); + call_void_hook(bpf_map_free_security, map); } /** - * security_bpf_token_free() - Free a BPF token's LSM blob - * @token: BPF token struct + * security_bpf_prog_free() - Free a bpf program's LSM blob + * @aux: bpf program aux info struct * - * Clean up the security information stored inside BPF token. + * Clean up the security information stored inside bpf prog. */ -void security_bpf_token_free(struct bpf_token *token) +void security_bpf_prog_free(struct bpf_prog_aux *aux) { - call_void_hook(bpf_token_free, token); + call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1501e95366a1..340b2bbbb2dd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1660,8 +1660,6 @@ static int inode_has_perm(const struct cred *cred, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; @@ -3056,8 +3054,6 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; sid = cred_sid(cred); @@ -3101,8 +3097,6 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (!mask) return 0; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; @@ -6783,8 +6777,7 @@ static int selinux_bpf_prog(struct bpf_prog *prog) BPF__PROG_RUN, NULL); } -static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) +static int selinux_bpf_map_alloc(struct bpf_map *map) { struct bpf_security_struct *bpfsec; @@ -6806,31 +6799,7 @@ static void selinux_bpf_map_free(struct bpf_map *map) kfree(bpfsec); } -static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) -{ - struct bpf_security_struct *bpfsec; - - bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); - if (!bpfsec) - return -ENOMEM; - - bpfsec->sid = current_sid(); - prog->aux->security = bpfsec; - - return 0; -} - -static void selinux_bpf_prog_free(struct bpf_prog *prog) -{ - struct bpf_security_struct *bpfsec = prog->aux->security; - - prog->aux->security = NULL; - kfree(bpfsec); -} - -static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) +static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux) { struct bpf_security_struct *bpfsec; @@ -6839,16 +6808,16 @@ static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *att return -ENOMEM; bpfsec->sid = current_sid(); - token->security = bpfsec; + aux->security = bpfsec; return 0; } -static void selinux_bpf_token_free(struct bpf_token *token) +static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) { - struct bpf_security_struct *bpfsec = token->security; + struct bpf_security_struct *bpfsec = aux->security; - token->security = NULL; + aux->security = NULL; kfree(bpfsec); } #endif @@ -7204,9 +7173,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(bpf, selinux_bpf), LSM_HOOK_INIT(bpf_map, selinux_bpf_map), LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog), - LSM_HOOK_INIT(bpf_map_free, selinux_bpf_map_free), - LSM_HOOK_INIT(bpf_prog_free, selinux_bpf_prog_free), - LSM_HOOK_INIT(bpf_token_free, selinux_bpf_token_free), + LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), + LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif #ifdef CONFIG_PERF_EVENTS @@ -7263,9 +7231,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(audit_rule_init, selinux_audit_rule_init), #endif #ifdef CONFIG_BPF_SYSCALL - LSM_HOOK_INIT(bpf_map_create, selinux_bpf_map_create), - LSM_HOOK_INIT(bpf_prog_load, selinux_bpf_prog_load), - LSM_HOOK_INIT(bpf_token_create, selinux_bpf_token_create), + LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc), + LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc), #endif #ifdef CONFIG_PERF_EVENTS LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 1cde2a69bdb4..78cee53fee02 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1993,7 +1993,10 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), + SND_PCI_QUIRK(0x1043, 0x86ae, "ASUS", 1), /* Z170 PRO */ + SND_PCI_QUIRK(0x1043, 0x86c7, "ASUS", 1), /* Z170M PLUS */ SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2060, "Intel NUC5CPYB", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0377912e9264..e45d4c405f8f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9795,6 +9795,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x84ae, "HP 15-db0403ng", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index fb802802939e..63a90c7e8976 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -455,9 +455,9 @@ static int tas2781_save_calibration(struct tasdevice_priv *tas_priv) status = efi.get_variable(efi_name, &efi_guid, &attr, &tas_priv->cali_data.total_sz, tas_priv->cali_data.data); - if (status != EFI_SUCCESS) - return -EINVAL; } + if (status != EFI_SUCCESS) + return -EINVAL; tmp_val = (unsigned int *)tas_priv->cali_data.data; @@ -550,11 +550,6 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) tas2781_save_calibration(tas_priv); out: - if (tas_priv->fw_state == TASDEVICE_DSP_FW_FAIL) { - /*If DSP FW fail, kcontrol won't be created */ - tasdevice_config_info_remove(tas_priv); - tasdevice_dsp_remove(tas_priv); - } mutex_unlock(&tas_priv->codec_lock); if (fmw) release_firmware(fmw); @@ -612,9 +607,13 @@ static void tas2781_hda_unbind(struct device *dev, { struct tasdevice_priv *tas_priv = dev_get_drvdata(dev); struct hda_component *comps = master_data; + comps = &comps[tas_priv->index]; - if (comps[tas_priv->index].dev == dev) - memset(&comps[tas_priv->index], 0, sizeof(*comps)); + if (comps->dev == dev) { + comps->dev = NULL; + memset(comps->name, 0, sizeof(comps->name)); + comps->playback_hook = NULL; + } tasdevice_config_info_remove(tas_priv); tasdevice_dsp_remove(tas_priv); @@ -675,14 +674,14 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) pm_runtime_put_autosuspend(tas_priv->dev); + tas2781_reset(tas_priv); + ret = component_add(tas_priv->dev, &tas2781_hda_comp_ops); if (ret) { dev_err(tas_priv->dev, "Register component failed: %d\n", ret); pm_runtime_disable(tas_priv->dev); - goto err; } - tas2781_reset(tas_priv); err: if (ret) tas2781_hda_remove(&clt->dev); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e0545201b55f..7f24d898efbb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -847,36 +847,6 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * - * BPF_TOKEN_CREATE - * Description - * Create BPF token with embedded information about what - * BPF-related functionality it allows: - * - a set of allowed bpf() syscall commands; - * - a set of allowed BPF map types to be created with - * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; - * - a set of allowed BPF program types and BPF program attach - * types to be loaded with BPF_PROG_LOAD command, if - * BPF_PROG_LOAD itself is allowed. - * - * BPF token is created (derived) from an instance of BPF FS, - * assuming it has necessary delegation mount options specified. - * This BPF token can be passed as an extra parameter to various - * bpf() syscall commands to grant BPF subsystem functionality to - * unprivileged processes. - * - * When created, BPF token is "associated" with the owning - * user namespace of BPF FS instance (super block) that it was - * derived from, and subsequent BPF operations performed with - * BPF token would be performing capabilities checks (i.e., - * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within - * that user namespace. Without BPF token, such capabilities - * have to be granted in init user namespace, making bpf() - * syscall incompatible with user namespace, for the most part. - * - * Return - * A new file descriptor (a nonnegative integer), or -1 if an - * error occurred (in which case, *errno* is set appropriately). - * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -931,8 +901,6 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, - BPF_TOKEN_CREATE, - __MAX_BPF_CMD, }; enum bpf_map_type { @@ -983,7 +951,6 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, - __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1028,7 +995,6 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, - __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1437,7 +1403,6 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; - __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1507,7 +1472,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; - __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1620,7 +1584,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; - __u32 btf_token_fd; }; struct { @@ -1751,11 +1714,6 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; - struct { /* struct used by BPF_TOKEN_CREATE command */ - __u32 flags; - __u32 bpffs_fd; - } token_create; - } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h index 3faee0199a9b..bd4b227ab4ba 100644 --- a/tools/include/uapi/linux/pkt_cls.h +++ b/tools/include/uapi/linux/pkt_cls.h @@ -204,37 +204,6 @@ struct tc_u32_pcnt { #define TC_U32_MAXDEPTH 8 - -/* RSVP filter */ - -enum { - TCA_RSVP_UNSPEC, - TCA_RSVP_CLASSID, - TCA_RSVP_DST, - TCA_RSVP_SRC, - TCA_RSVP_PINFO, - TCA_RSVP_POLICE, - TCA_RSVP_ACT, - __TCA_RSVP_MAX -}; - -#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) - -struct tc_rsvp_gpi { - __u32 key; - __u32 mask; - int offset; -}; - -struct tc_rsvp_pinfo { - struct tc_rsvp_gpi dpi; - struct tc_rsvp_gpi spi; - __u8 protocol; - __u8 tunnelid; - __u8 tunnelhdr; - __u8 pad; -}; - /* ROUTE filter */ enum { @@ -265,22 +234,6 @@ enum { #define TCA_FW_MAX (__TCA_FW_MAX - 1) -/* TC index filter */ - -enum { - TCA_TCINDEX_UNSPEC, - TCA_TCINDEX_HASH, - TCA_TCINDEX_MASK, - TCA_TCINDEX_SHIFT, - TCA_TCINDEX_FALL_THROUGH, - TCA_TCINDEX_CLASSID, - TCA_TCINDEX_POLICE, - TCA_TCINDEX_ACT, - __TCA_TCINDEX_MAX -}; - -#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) - /* Flow filter */ enum { diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 5c903abc9fa5..587481a19433 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -457,115 +457,6 @@ enum { #define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) - -/* CBQ section */ - -#define TC_CBQ_MAXPRIO 8 -#define TC_CBQ_MAXLEVEL 8 -#define TC_CBQ_DEF_EWMA 5 - -struct tc_cbq_lssopt { - unsigned char change; - unsigned char flags; -#define TCF_CBQ_LSS_BOUNDED 1 -#define TCF_CBQ_LSS_ISOLATED 2 - unsigned char ewma_log; - unsigned char level; -#define TCF_CBQ_LSS_FLAGS 1 -#define TCF_CBQ_LSS_EWMA 2 -#define TCF_CBQ_LSS_MAXIDLE 4 -#define TCF_CBQ_LSS_MINIDLE 8 -#define TCF_CBQ_LSS_OFFTIME 0x10 -#define TCF_CBQ_LSS_AVPKT 0x20 - __u32 maxidle; - __u32 minidle; - __u32 offtime; - __u32 avpkt; -}; - -struct tc_cbq_wrropt { - unsigned char flags; - unsigned char priority; - unsigned char cpriority; - unsigned char __reserved; - __u32 allot; - __u32 weight; -}; - -struct tc_cbq_ovl { - unsigned char strategy; -#define TC_CBQ_OVL_CLASSIC 0 -#define TC_CBQ_OVL_DELAY 1 -#define TC_CBQ_OVL_LOWPRIO 2 -#define TC_CBQ_OVL_DROP 3 -#define TC_CBQ_OVL_RCLASSIC 4 - unsigned char priority2; - __u16 pad; - __u32 penalty; -}; - -struct tc_cbq_police { - unsigned char police; - unsigned char __res1; - unsigned short __res2; -}; - -struct tc_cbq_fopt { - __u32 split; - __u32 defmap; - __u32 defchange; -}; - -struct tc_cbq_xstats { - __u32 borrows; - __u32 overactions; - __s32 avgidle; - __s32 undertime; -}; - -enum { - TCA_CBQ_UNSPEC, - TCA_CBQ_LSSOPT, - TCA_CBQ_WRROPT, - TCA_CBQ_FOPT, - TCA_CBQ_OVL_STRATEGY, - TCA_CBQ_RATE, - TCA_CBQ_RTAB, - TCA_CBQ_POLICE, - __TCA_CBQ_MAX, -}; - -#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) - -/* dsmark section */ - -enum { - TCA_DSMARK_UNSPEC, - TCA_DSMARK_INDICES, - TCA_DSMARK_DEFAULT_INDEX, - TCA_DSMARK_SET_TC_INDEX, - TCA_DSMARK_MASK, - TCA_DSMARK_VALUE, - __TCA_DSMARK_MAX, -}; - -#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) - -/* ATM section */ - -enum { - TCA_ATM_UNSPEC, - TCA_ATM_FD, /* file/socket descriptor */ - TCA_ATM_PTR, /* pointer to descriptor - later */ - TCA_ATM_HDR, /* LL header */ - TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ - TCA_ATM_ADDR, /* PVC address (for output only) */ - TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ - __TCA_ATM_MAX, -}; - -#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) - /* Network emulator */ enum { diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index b6619199a706..2d0c282c8588 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o features.o + usdt.o zip.o elf.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 0ad8e532b3cf..9dc9625651dc 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(int token_fd) +int probe_memcg_account(void) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,7 +120,6 @@ int probe_memcg_account(int token_fd) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); - attr.prog_token_fd = token_fd; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { @@ -147,7 +146,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -170,7 +169,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, map_extra); union bpf_attr attr; int fd; @@ -182,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) + if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -199,8 +198,6 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); - attr.map_token_fd = OPTS_GET(opts, token_fd, 0); - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -235,7 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -264,9 +261,8 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); - attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) + if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); @@ -1186,7 +1182,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1211,8 +1207,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; - attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); - /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading @@ -1293,20 +1287,3 @@ int bpf_prog_bind_map(int prog_fd, int map_fd, ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } - -int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) -{ - const size_t attr_sz = offsetofend(union bpf_attr, token_create); - union bpf_attr attr; - int fd; - - if (!OPTS_VALID(opts, bpf_token_create_opts)) - return libbpf_err(-EINVAL); - - memset(&attr, 0, attr_sz); - attr.token_create.bpffs_fd = bpffs_fd; - attr.token_create.flags = OPTS_GET(opts, flags, 0); - - fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); - return libbpf_err_errno(fd); -} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 991b86bfe7e4..d0f53772bdc0 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,11 +51,8 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; - - __u32 token_fd; - size_t :0; }; -#define bpf_map_create_opts__last_field token_fd +#define bpf_map_create_opts__last_field map_ifindex LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, @@ -105,10 +102,9 @@ struct bpf_prog_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; - __u32 token_fd; size_t :0; }; -#define bpf_prog_load_opts__last_field token_fd +#define bpf_prog_load_opts__last_field log_true_size LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, @@ -134,10 +130,9 @@ struct bpf_btf_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; - __u32 token_fd; size_t :0; }; -#define bpf_btf_load_opts__last_field token_fd +#define bpf_btf_load_opts__last_field log_true_size LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts); @@ -645,30 +640,6 @@ struct bpf_test_run_opts { LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); -struct bpf_token_create_opts { - size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 flags; - size_t :0; -}; -#define bpf_token_create_opts__last_field flags - -/** - * @brief **bpf_token_create()** creates a new instance of BPF token derived - * from specified BPF FS mount point. - * - * BPF token created with this API can be passed to bpf() syscall for - * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. - * - * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token - * instance. - * @param opts optional BPF token creation options, can be NULL - * - * @return BPF token FD > 0, on success; negative error code, otherwise (errno - * is also set to the error code) - */ -LIBBPF_API int bpf_token_create(int bpffs_fd, - struct bpf_token_create_opts *opts); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 77ceea575dc7..2324cc42b017 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -188,6 +188,9 @@ enum libbpf_tristate { !!sym; \ }) +#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx"))) +#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 63033c334320..ee95fd379d4d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,9 +1317,7 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, - char *log_buf, size_t log_sz, __u32 log_level, - int token_fd) +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1369,7 +1367,6 @@ retry_load: opts.log_level = log_level; } - opts.token_fd = token_fd; btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1397,7 +1394,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index c92e02394159..b02faec748a5 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,6 +11,8 @@ #include "libbpf_internal.h" #include "str_error.h" +#define STRERR_BUFSIZE 128 + /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c deleted file mode 100644 index ce98a334be21..000000000000 --- a/tools/lib/bpf/features.c +++ /dev/null @@ -1,478 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ -#include <linux/kernel.h> -#include <linux/filter.h> -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_common.h" -#include "libbpf_internal.h" -#include "str_error.h" - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64)(unsigned long)ptr; -} - -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(int token_fd) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - attr.prog_token_fd = token_fd; - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(int token_fd) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); - LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(int token_fd) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_func(int token_fd) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_func_global(int token_fd) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_datasec(int token_fd) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_float(int token_fd) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_decl_tag(int token_fd) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_type_tag(int token_fd) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_array_mmap(int token_fd) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .map_flags = BPF_F_MMAPABLE, - .token_fd = token_fd, - ); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(int token_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .token_fd = token_fd, - ); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(int token_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_prog_bind_map(int token_fd) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); - LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(int token_fd) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(int token_fd) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), &opts); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(int token_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - .token_fd = token_fd, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(int token_fd) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(int token_fd) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -typedef int (*feature_probe_fn)(int /* token_fd */); - -static struct kern_feature_cache feature_cache; - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) -{ - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - /* assume global feature cache, unless custom one is provided */ - if (!cache) - cache = &feature_cache; - - if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(cache->token_fd); - if (ret > 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } - } - - return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; -} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4b5ff9508e18..ebcfb2147fbd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,8 +59,6 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" - #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -695,10 +693,6 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct kern_feature_cache *feat_cache; - char *token_path; - int token_fd; - char path[]; }; @@ -2198,7 +2192,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = BPF_FS_DEFAULT_PATH; + path = "/sys/fs/bpf"; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3285,7 +3279,7 @@ skip_exception_cb: } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0, obj->token_fd); + obj->log_level ? 1 : 0); } if (sanitize) { if (!err) { @@ -4361,6 +4355,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); + if (!scn_data) + return -LIBBPF_ERRNO__FORMAT; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, scn); @@ -4608,63 +4604,6 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } -static int bpf_object_prepare_token(struct bpf_object *obj) -{ - const char *bpffs_path; - int bpffs_fd = -1, token_fd, err; - bool mandatory; - enum libbpf_print_level level; - - /* token is already set up */ - if (obj->token_fd > 0) - return 0; - /* token is explicitly prevented */ - if (obj->token_fd < 0) { - pr_debug("object '%s': token is prevented, skipping...\n", obj->name); - /* reset to zero to avoid extra checks during map_create and prog_load steps */ - obj->token_fd = 0; - return 0; - } - - mandatory = obj->token_path != NULL; - level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; - - bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; - bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); - if (bpffs_fd < 0) { - err = -errno; - __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", - obj->name, err, bpffs_path, - mandatory ? "" : ", skipping optional step..."); - return mandatory ? err : 0; - } - - token_fd = bpf_token_create(bpffs_fd, 0); - close(bpffs_fd); - if (token_fd < 0) { - if (!mandatory && token_fd == -ENOENT) { - pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", - obj->name, bpffs_path); - return 0; - } - __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", - obj->name, token_fd, bpffs_path, - mandatory ? "" : ", skipping optional step..."); - return mandatory ? token_fd : 0; - } - - obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); - if (!obj->feat_cache) { - close(token_fd); - return -ENOMEM; - } - - obj->token_fd = token_fd; - obj->feat_cache->token_fd = token_fd; - - return 0; -} - static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4674,7 +4613,6 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd); if (obj->gen_loader) return 0; @@ -4684,9 +4622,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4701,18 +4639,462 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } +static int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func_global(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_datasec(void) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_float(void) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_decl_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_type_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); +} + +static int probe_prog_bind_map(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_syscall_wrapper(void); + +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; + enum kern_feature_result res; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (obj->token_fd) - return feat_supported(obj->feat_cache, feat_id); + if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { + ret = feat->probe(); + if (ret > 0) { + WRITE_ONCE(feat->res, FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(feat->res, FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(feat->res, FEAT_MISSING); + } + } - return feat_supported(NULL, feat_id); + return READ_ONCE(feat->res) == FEAT_SUPPORTED; } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -4831,7 +5213,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - create_attr.token_fd = obj->token_fd; if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -6667,7 +7048,6 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.token_fd = obj->token_fd; /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(obj); @@ -7129,10 +7509,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path, *token_path; + const char *obj_name, *kconfig, *btf_tmp_path; struct bpf_object *obj; char tmp_name[64]; - int err, token_fd; + int err; char *log_buf; size_t log_size; __u32 log_level; @@ -7166,28 +7546,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); - token_path = OPTS_GET(opts, bpf_token_path, NULL); - token_fd = OPTS_GET(opts, bpf_token_fd, -1); - /* non-empty token path can't be combined with invalid token FD */ - if (token_path && token_path[0] != '\0' && token_fd < 0) - return ERR_PTR(-EINVAL); - /* empty token path can't be combined with valid token FD */ - if (token_path && token_path[0] == '\0' && token_fd > 0) - return ERR_PTR(-EINVAL); - /* if user didn't specify bpf_token_path/bpf_token_fd explicitly, - * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as - * bpf_token_path option - */ - if (token_fd == 0 && !token_path) - token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); - /* empty token_path is equivalent to invalid token_fd */ - if (token_path && token_path[0] == '\0') { - token_path = NULL; - token_fd = -1; - } - if (token_path && strlen(token_path) >= PATH_MAX) - return ERR_PTR(-ENAMETOOLONG); - obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7196,19 +7554,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; - obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd); - if (token_fd > 0 && obj->token_fd < 0) { - err = -errno; - goto out; - } - if (token_path) { - obj->token_path = strdup(token_path); - if (!obj->token_path) { - err = -ENOMEM; - goto out; - } - } - btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -7719,8 +8064,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object_prepare_token(obj); - err = err ? : bpf_object__probe_loading(obj); + err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); @@ -8257,11 +8601,6 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); - zfree(&obj->feat_cache); - zfree(&obj->token_path); - if (obj->token_fd > 0) - close(obj->token_fd); - free(obj); } @@ -10275,7 +10614,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -int probe_kern_syscall_wrapper(int token_fd) +static int probe_kern_syscall_wrapper(void) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 916904bd2a7a..6cd9c501624f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,45 +177,10 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; - /* FD of a BPF token instantiated by user through bpf_token_create() - * API. BPF object will keep dup()'ed FD internally, so passed token - * FD can be closed after BPF object/skeleton open step. - * - * Setting bpf_token_fd to negative value disables libbpf's automatic - * attempt to create BPF token from default BPF FS mount point - * (/sys/fs/bpf), in case this default behavior is undesirable. - * - * If bpf_token_path and bpf_token_fd are not specified, libbpf will - * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will - * be taken as a value of bpf_token_path option and will force libbpf - * to either create BPF token from provided custom BPF FS path, or - * will disable implicit BPF token creation, if envvar value is an - * empty string. - * - * bpf_token_path and bpf_token_fd are mutually exclusive and only one - * of those options should be set. Either of them overrides - * LIBBPF_BPF_TOKEN_PATH envvar. - */ - int bpf_token_fd; - /* Path to BPF FS mount point to derive BPF token from. - * - * Created BPF token will be used for all bpf() syscall operations - * that accept BPF token (e.g., map creation, BTF and program loads, - * etc) automatically within instantiated BPF object. - * - * Setting bpf_token_path option to empty string disables libbpf's - * automatic attempt to create BPF token from default BPF FS mount - * point (/sys/fs/bpf), in case this default behavior is undesirable. - * - * bpf_token_path and bpf_token_fd are mutually exclusive and only one - * of those options should be set. Either of them overrides - * LIBBPF_BPF_TOKEN_PATH envvar. - */ - const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field bpf_token_path +#define bpf_object_open_opts__last_field kernel_log_level /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index df7657b65c47..91c5aef7dae7 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -401,7 +401,6 @@ LIBBPF_1.3.0 { bpf_program__attach_netkit; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; - bpf_token_create; ring__avail_data_size; ring__consume; ring__consumer_pos; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4cda32298c49..b5d334754e5d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -360,32 +360,15 @@ enum kern_feature_id { __FEAT_CNT, }; -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -struct kern_feature_cache { - enum kern_feature_result res[__FEAT_CNT]; - int token_fd; -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); +int probe_memcg_account(void); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); - -int probe_kern_syscall_wrapper(int token_fd); -int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len, - int token_fd); -int btf_load_into_kernel(struct btf *btf, - char *log_buf, size_t log_sz, __u32 log_level, - int token_fd); + const char *str_sec, size_t str_len); +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -549,17 +532,6 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } -/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. - * Original FD is not closed or altered in any other way. - * Preserves original FD value, if it's invalid (negative). - */ -static inline int dup_good_fd(int fd) -{ - if (fd < 0) - return fd; - return fcntl(fd, F_DUPFD_CLOEXEC, 3); -} - /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -571,7 +543,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = dup_good_fd(fd); + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); saved_errno = errno; close(old_fd); errno = saved_errno; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 8e7437006639..9c4db90b92b6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,8 +219,7 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len, - int token_fd) + const char *str_sec, size_t str_len) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -230,7 +229,6 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; - LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd); int btf_fd, btf_len; __u8 *raw_btf; @@ -243,7 +241,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); + btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); free(raw_btf); return btf_fd; @@ -273,7 +271,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), 0); + strs, sizeof(strs)); } static int probe_map_create(enum bpf_map_type map_type) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 52a2901e8bd0..16bca56002ab 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -719,6 +719,9 @@ static int linker_sanity_check_elf(struct src_obj *obj) return -EINVAL; } + if (is_dwarf_sec_name(sec->sec_name)) + continue; + if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) { pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n", sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index 626d7ffb03d6..a139334d57b6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,8 +2,5 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H -#define STRERR_BUFSIZE 128 - char *libbpf_strerror_r(int err, char *dst, int len); - #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 649ebdef9c3f..1685d7ea6a9f 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -6,7 +6,6 @@ * * Yes, this is unfortunate. A better solution is in the works. */ -NORETURN(__invalid_creds) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 90f3c9802ffb..95dc58b94178 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -62,5 +62,6 @@ cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o +cxl_core-y += cxl_core_exports.o obj-m += test/ diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c new file mode 100644 index 000000000000..077e6883921d --- /dev/null +++ b/tools/testing/cxl/cxl_core_exports.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include "cxl.h" + +/* Exporting of cxl_core symbols that are only used by cxl_test */ +EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, CXL); diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index b88546299902..f4e517a0c774 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -669,10 +669,11 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) return 0; dev_dbg(&port->dev, "%s commit\n", dev_name(&cxld->dev)); - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f0c854d6511c..81f094e7f0f7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -156,12 +156,10 @@ ifneq ($(KBUILD_OUTPUT),) abs_objtree := $(realpath $(abs_objtree)) BUILD := $(abs_objtree)/kselftest KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include - KHDR_DIR := ${abs_objtree}/usr/include else BUILD := $(CURDIR) abs_srctree := $(shell cd $(top_srcdir) && pwd) KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include - KHDR_DIR := ${abs_srctree}/usr/include DEFAULT_INSTALL_HDR_PATH := 1 endif @@ -175,7 +173,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -all: kernel_header_files +all: @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -186,23 +184,6 @@ all: kernel_header_files ret=$$((ret * $$?)); \ done; exit $$ret; -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files - run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c index 9146d3f414d2..926ee822143e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -335,6 +335,7 @@ static void htab_mem_report_final(struct bench_res res[], int res_cnt) " peak memory usage %7.2lfMiB\n", loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0); + close(ctx.fd); cleanup_cgroup_environment(); } diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index f7bfb2b09c82..b946088017f1 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 8ec73fdfcdab..f29fc789c14b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -348,7 +348,8 @@ static void test_func_sockmap_update(void) } static void test_obj_load_failure_common(const char *obj_file, - const char *target_obj_file) + const char *target_obj_file, + const char *exp_msg) { /* * standalone test that asserts failure to load freplace prog @@ -356,6 +357,7 @@ static void test_obj_load_failure_common(const char *obj_file, */ struct bpf_object *obj = NULL, *pkt_obj; struct bpf_program *prog; + char log_buf[64 * 1024]; int err, pkt_fd; __u32 duration = 0; @@ -374,11 +376,21 @@ static void test_obj_load_failure_common(const char *obj_file, err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "set_attach_target"); + log_buf[0] = '\0'; + if (exp_msg) + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); + if (env.verbosity > VERBOSE_NONE) + bpf_program__set_log_level(prog, 2); + /* It should fail to load the program */ err = bpf_object__load(obj); + if (env.verbosity > VERBOSE_NONE && exp_msg) /* we overtook log */ + printf("VERIFIER LOG:\n================\n%s\n================\n", log_buf); if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) goto close_prog; + if (exp_msg) + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "fail_msg"); close_prog: bpf_object__close(obj); bpf_object__close(pkt_obj); @@ -388,14 +400,24 @@ static void test_func_replace_return_code(void) { /* test invalid return code in the replaced program */ test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o", - "./connect4_prog.bpf.o"); + "./connect4_prog.bpf.o", NULL); } static void test_func_map_prog_compatibility(void) { /* test with spin lock map value in the replaced program */ test_obj_load_failure_common("./freplace_attach_probe.bpf.o", - "./test_attach_probe.bpf.o"); + "./test_attach_probe.bpf.o", NULL); +} + +static void test_func_replace_unreliable(void) +{ + /* freplace'ing unreliable main prog should fail with error + * "Cannot replace static functions" + */ + test_obj_load_failure_common("freplace_unreliable_prog.bpf.o", + "./verifier_btf_unreliable_prog.bpf.o", + "Cannot replace static functions"); } static void test_func_replace_global_func(void) @@ -563,6 +585,8 @@ void serial_test_fexit_bpf2bpf(void) test_func_replace_return_code(); if (test__start_subtest("func_map_prog_compatibility")) test_func_map_prog_compatibility(); + if (test__start_subtest("func_replace_unreliable")) + test_func_replace_unreliable(); if (test__start_subtest("func_replace_multi")) test_func_replace_multi(); if (test__start_subtest("fmod_ret_freplace")) diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 4ed46ed58a7b..9f766ddd946a 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -30,8 +30,6 @@ void test_libbpf_probe_prog_types(void) if (prog_type == BPF_PROG_TYPE_UNSPEC) continue; - if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0) - continue; if (!test__start_subtest(prog_type_name)) continue; @@ -70,8 +68,6 @@ void test_libbpf_probe_map_types(void) if (map_type == BPF_MAP_TYPE_UNSPEC) continue; - if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0) - continue; if (!test__start_subtest(map_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 62ea855ec4d0..eb34d612d6f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -132,9 +132,6 @@ static void test_libbpf_bpf_map_type_str(void) const char *map_type_str; char buf[256]; - if (map_type == __MAX_BPF_MAP_TYPE) - continue; - map_type_name = btf__str_by_offset(btf, e->name_off); map_type_str = libbpf_bpf_map_type_str(map_type); ASSERT_OK_PTR(map_type_str, map_type_name); @@ -189,9 +186,6 @@ static void test_libbpf_bpf_prog_type_str(void) const char *prog_type_str; char buf[256]; - if (prog_type == __MAX_BPF_PROG_TYPE) - continue; - prog_type_name = btf__str_by_offset(btf, e->name_off); prog_type_str = libbpf_bpf_prog_type_str(prog_type); ASSERT_OK_PTR(prog_type_str, prog_type_name); diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index effd78b2a657..7a3fa2ff567b 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -169,9 +169,9 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_none")) bad_core_relo(0, TRUNC_NONE /* full buf */); if (test__start_subtest("bad_core_relo_trunc_partial")) - bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); + bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 0c9abd279e18..820d0bcfc474 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -590,12 +590,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); break; case OP_NE: - /* generic case, can't derive more information */ - *newx = range(t, x.a, x.b); - *newy = range(t, y.a, y.b); - break; - - /* below extended logic is not supported by verifier just yet */ + /* below logic is supported by the verifier now */ if (x.a == x.b && x.a == y.a) { /* X is a constant matching left side of Y */ *newx = range(t, x.a, x.b); @@ -2097,10 +2092,22 @@ static struct subtest_case crafted_cases[] = { {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, - {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}}, - {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, - {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, - {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, + {S32, U64, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)(s32)-255, 0}}, + {S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, + {S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}}, + {S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}}, + + /* edge overlap testings for BPF_NE */ + {U64, U64, {0, U64_MAX}, {U64_MAX, U64_MAX}}, + {U64, U64, {0, U64_MAX}, {0, 0}}, + {S64, U64, {S64_MIN, 0}, {S64_MIN, S64_MIN}}, + {S64, U64, {S64_MIN, 0}, {0, 0}}, + {S64, U64, {S64_MIN, S64_MAX}, {S64_MAX, S64_MAX}}, + {U32, U32, {0, U32_MAX}, {0, 0}}, + {U32, U32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + {S32, U32, {(u32)S32_MIN, 0}, {0, 0}}, + {S32, U32, {(u32)S32_MIN, 0}, {(u32)S32_MIN, (u32)S32_MIN}}, + {S32, U32, {(u32)S32_MIN, S32_MAX}, {S32_MAX, S32_MAX}}, }; /* Go over crafted hard-coded cases. This is fast, so we do it as part of diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index f75f84d0b3d7..7c2241fae19a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -524,6 +524,37 @@ out: test_sockmap_pass_prog__destroy(pass); } +static void test_sockmap_unconnected_unix(void) +{ + int err, map, stream = 0, dgram = 0, zero = 0; + struct test_sockmap_pass_prog *skel; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + stream = xsocket(AF_UNIX, SOCK_STREAM, 0); + if (stream < 0) + return; + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + close(stream); + return; + } + + err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); + ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + + close(stream); + close(dgram); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -566,4 +597,7 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); + + if (test__start_subtest("sockmap unconnected af_unix")) + test_sockmap_unconnected_unix(); } diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c deleted file mode 100644 index b5dce630e0e1..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ /dev/null @@ -1,1031 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ -#define _GNU_SOURCE -#include <test_progs.h> -#include <bpf/btf.h> -#include "cap_helpers.h" -#include <fcntl.h> -#include <sched.h> -#include <signal.h> -#include <unistd.h> -#include <linux/filter.h> -#include <linux/unistd.h> -#include <linux/mount.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/syscall.h> -#include <sys/un.h> -#include "priv_map.skel.h" -#include "priv_prog.skel.h" -#include "dummy_st_ops_success.skel.h" - -static inline int sys_mount(const char *dev_name, const char *dir_name, - const char *type, unsigned long flags, - const void *data) -{ - return syscall(__NR_mount, dev_name, dir_name, type, flags, data); -} - -static inline int sys_fsopen(const char *fsname, unsigned flags) -{ - return syscall(__NR_fsopen, fsname, flags); -} - -static inline int sys_fspick(int dfd, const char *path, unsigned flags) -{ - return syscall(__NR_fspick, dfd, path, flags); -} - -static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) -{ - return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); -} - -static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) -{ - return syscall(__NR_fsmount, fs_fd, flags, ms_flags); -} - -static inline int sys_move_mount(int from_dfd, const char *from_path, - int to_dfd, const char *to_path, - unsigned flags) -{ - return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); -} - -static int drop_priv_caps(__u64 *old_caps) -{ - return cap_disable_effective((1ULL << CAP_BPF) | - (1ULL << CAP_PERFMON) | - (1ULL << CAP_NET_ADMIN) | - (1ULL << CAP_SYS_ADMIN), old_caps); -} - -static int restore_priv_caps(__u64 old_caps) -{ - return cap_enable_effective(old_caps, NULL); -} - -static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) -{ - char buf[32]; - int err; - - if (!mask_str) { - if (mask == ~0ULL) { - mask_str = "any"; - } else { - snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); - mask_str = buf; - } - } - - err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, - mask_str, 0); - if (err < 0) - err = -errno; - return err; -} - -#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) - -struct bpffs_opts { - __u64 cmds; - __u64 maps; - __u64 progs; - __u64 attachs; - const char *cmds_str; - const char *maps_str; - const char *progs_str; - const char *attachs_str; -}; - -static int create_bpffs_fd(void) -{ - int fs_fd; - - /* create VFS context */ - fs_fd = sys_fsopen("bpf", 0); - ASSERT_GE(fs_fd, 0, "fs_fd"); - - return fs_fd; -} - -static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) -{ - int mnt_fd, err; - - /* set up token delegation mount options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); - if (!ASSERT_OK(err, "fs_cfg_cmds")) - return err; - err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); - if (!ASSERT_OK(err, "fs_cfg_maps")) - return err; - err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); - if (!ASSERT_OK(err, "fs_cfg_progs")) - return err; - err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); - if (!ASSERT_OK(err, "fs_cfg_attachs")) - return err; - - /* instantiate FS object */ - err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); - if (err < 0) - return -errno; - - /* create O_PATH fd for detached mount */ - mnt_fd = sys_fsmount(fs_fd, 0, 0); - if (err < 0) - return -errno; - - return mnt_fd; -} - -/* send FD over Unix domain (AF_UNIX) socket */ -static int sendfd(int sockfd, int fd) -{ - struct msghdr msg = {}; - struct cmsghdr *cmsg; - int fds[1] = { fd }, err; - char iobuf[1]; - struct iovec io = { - .iov_base = iobuf, - .iov_len = sizeof(iobuf), - }; - union { - char buf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr align; - } u; - - msg.msg_iov = &io; - msg.msg_iovlen = 1; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); - memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); - - err = sendmsg(sockfd, &msg, 0); - if (err < 0) - err = -errno; - if (!ASSERT_EQ(err, 1, "sendmsg")) - return -EINVAL; - - return 0; -} - -/* receive FD over Unix domain (AF_UNIX) socket */ -static int recvfd(int sockfd, int *fd) -{ - struct msghdr msg = {}; - struct cmsghdr *cmsg; - int fds[1], err; - char iobuf[1]; - struct iovec io = { - .iov_base = iobuf, - .iov_len = sizeof(iobuf), - }; - union { - char buf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr align; - } u; - - msg.msg_iov = &io; - msg.msg_iovlen = 1; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - err = recvmsg(sockfd, &msg, 0); - if (err < 0) - err = -errno; - if (!ASSERT_EQ(err, 1, "recvmsg")) - return -EINVAL; - - cmsg = CMSG_FIRSTHDR(&msg); - if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || - !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || - !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || - !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) - return -EINVAL; - - memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); - *fd = fds[0]; - - return 0; -} - -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = write(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - -static int write_file(const char *path, const void *buf, size_t count) -{ - int fd; - ssize_t ret; - - fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); - if (fd < 0) - return -1; - - ret = write_nointr(fd, buf, count); - close(fd); - if (ret < 0 || (size_t)ret != count) - return -1; - - return 0; -} - -static int create_and_enter_userns(void) -{ - uid_t uid; - gid_t gid; - char map[100]; - - uid = getuid(); - gid = getgid(); - - if (unshare(CLONE_NEWUSER)) - return -1; - - if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && - errno != ENOENT) - return -1; - - snprintf(map, sizeof(map), "0 %d 1", uid); - if (write_file("/proc/self/uid_map", map, strlen(map))) - return -1; - - - snprintf(map, sizeof(map), "0 %d 1", gid); - if (write_file("/proc/self/gid_map", map, strlen(map))) - return -1; - - if (setgid(0)) - return -1; - - if (setuid(0)) - return -1; - - return 0; -} - -typedef int (*child_callback_fn)(int); - -static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) -{ - LIBBPF_OPTS(bpf_map_create_opts, map_opts); - int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; - - /* setup userns with root mappings */ - err = create_and_enter_userns(); - if (!ASSERT_OK(err, "create_and_enter_userns")) - goto cleanup; - - /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ - err = unshare(CLONE_NEWNS); - if (!ASSERT_OK(err, "create_mountns")) - goto cleanup; - - err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); - if (!ASSERT_OK(err, "remount_root")) - goto cleanup; - - fs_fd = create_bpffs_fd(); - if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { - err = -EINVAL; - goto cleanup; - } - - /* ensure unprivileged child cannot set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); - err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); - err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); - err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); - - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, fs_fd); - if (!ASSERT_OK(err, "send_fs_fd")) - goto cleanup; - zclose(fs_fd); - - /* avoid mucking around with mount namespaces and mounting at - * well-known path, just get detach-mounted BPF FS fd back from parent - */ - err = recvfd(sock_fd, &mnt_fd); - if (!ASSERT_OK(err, "recv_mnt_fd")) - goto cleanup; - - /* try to fspick() BPF FS and try to add some delegation options */ - fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); - if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { - err = -EINVAL; - goto cleanup; - } - - /* ensure unprivileged child cannot reconfigure to set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - zclose(fs_fd); - - bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); - if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { - err = -EINVAL; - goto cleanup; - } - - /* do custom test logic with customly set up BPF FS instance */ - err = callback(bpffs_fd); - if (!ASSERT_OK(err, "test_callback")) - goto cleanup; - - err = 0; -cleanup: - zclose(sock_fd); - zclose(mnt_fd); - zclose(fs_fd); - zclose(bpffs_fd); - - exit(-err); -} - -static int wait_for_pid(pid_t pid) -{ - int status, ret; - -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == EINTR) - goto again; - - return -1; - } - - if (!WIFEXITED(status)) - return -1; - - return WEXITSTATUS(status); -} - -static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) -{ - int fs_fd = -1, mnt_fd = -1, err; - - err = recvfd(sock_fd, &fs_fd); - if (!ASSERT_OK(err, "recv_bpffs_fd")) - goto cleanup; - - mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); - if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { - err = -EINVAL; - goto cleanup; - } - zclose(fs_fd); - - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, mnt_fd); - if (!ASSERT_OK(err, "send_mnt_fd")) - goto cleanup; - zclose(mnt_fd); - - err = wait_for_pid(child_pid); - ASSERT_OK(err, "waitpid_child"); - -cleanup: - zclose(sock_fd); - zclose(fs_fd); - zclose(mnt_fd); - - if (child_pid > 0) - (void)kill(child_pid, SIGKILL); -} - -static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) -{ - int sock_fds[2] = { -1, -1 }; - int child_pid = 0, err; - - err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); - if (!ASSERT_OK(err, "socketpair")) - goto cleanup; - - child_pid = fork(); - if (!ASSERT_GE(child_pid, 0, "fork")) - goto cleanup; - - if (child_pid == 0) { - zclose(sock_fds[0]); - return child(sock_fds[1], bpffs_opts, cb); - - } else { - zclose(sock_fds[1]); - return parent(child_pid, bpffs_opts, sock_fds[0]); - } - -cleanup: - zclose(sock_fds[0]); - zclose(sock_fds[1]); - if (child_pid > 0) - (void)kill(child_pid, SIGKILL); -} - -static int userns_map_create(int mnt_fd) -{ - LIBBPF_OPTS(bpf_map_create_opts, map_opts); - int err, token_fd = -1, map_fd = -1; - __u64 old_caps = 0; - - /* create BPF token from BPF FS mount */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "token_create")) { - err = -EINVAL; - goto cleanup; - } - - /* while inside non-init userns, we need both a BPF token *and* - * CAP_BPF inside current userns to create privileged map; let's test - * that neither BPF token alone nor namespaced CAP_BPF is sufficient - */ - err = drop_priv_caps(&old_caps); - if (!ASSERT_OK(err, "drop_caps")) - goto cleanup; - - /* no token, no CAP_BPF -> fail */ - map_opts.token_fd = 0; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { - err = -EINVAL; - goto cleanup; - } - - /* token without CAP_BPF -> fail */ - map_opts.token_fd = token_fd; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { - err = -EINVAL; - goto cleanup; - } - - /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ - err = restore_priv_caps(old_caps); - if (!ASSERT_OK(err, "restore_caps")) - goto cleanup; - - /* CAP_BPF without token -> fail */ - map_opts.token_fd = 0; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { - err = -EINVAL; - goto cleanup; - } - - /* finally, namespaced CAP_BPF + token -> success */ - map_opts.token_fd = token_fd; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { - err = -EINVAL; - goto cleanup; - } - -cleanup: - zclose(token_fd); - zclose(map_fd); - return err; -} - -static int userns_btf_load(int mnt_fd) -{ - LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); - int err, token_fd = -1, btf_fd = -1; - const void *raw_btf_data; - struct btf *btf = NULL; - __u32 raw_btf_size; - __u64 old_caps = 0; - - /* create BPF token from BPF FS mount */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "token_create")) { - err = -EINVAL; - goto cleanup; - } - - /* while inside non-init userns, we need both a BPF token *and* - * CAP_BPF inside current userns to create privileged map; let's test - * that neither BPF token alone nor namespaced CAP_BPF is sufficient - */ - err = drop_priv_caps(&old_caps); - if (!ASSERT_OK(err, "drop_caps")) - goto cleanup; - - /* setup a trivial BTF data to load to the kernel */ - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "empty_btf")) - goto cleanup; - - ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); - - raw_btf_data = btf__raw_data(btf, &raw_btf_size); - if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) - goto cleanup; - - /* no token + no CAP_BPF -> failure */ - btf_opts.token_fd = 0; - btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); - if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) - goto cleanup; - - /* token + no CAP_BPF -> failure */ - btf_opts.token_fd = token_fd; - btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); - if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) - goto cleanup; - - /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ - err = restore_priv_caps(old_caps); - if (!ASSERT_OK(err, "restore_caps")) - goto cleanup; - - /* token + CAP_BPF -> success */ - btf_opts.token_fd = token_fd; - btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); - if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) - goto cleanup; - - err = 0; -cleanup: - btf__free(btf); - zclose(btf_fd); - zclose(token_fd); - return err; -} - -static int userns_prog_load(int mnt_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); - int err, token_fd = -1, prog_fd = -1; - struct bpf_insn insns[] = { - /* bpf_jiffies64() requires CAP_BPF */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), - /* bpf_get_current_task() requires CAP_PERFMON */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), - /* r0 = 0; exit; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - size_t insn_cnt = ARRAY_SIZE(insns); - __u64 old_caps = 0; - - /* create BPF token from BPF FS mount */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "token_create")) { - err = -EINVAL; - goto cleanup; - } - - /* validate we can successfully load BPF program with token; this - * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) - * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have - * BPF token wired properly in a bunch of places in the kernel - */ - prog_opts.token_fd = token_fd; - prog_opts.expected_attach_type = BPF_XDP; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { - err = -EPERM; - goto cleanup; - } - - /* no token + caps -> failure */ - prog_opts.token_fd = 0; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { - err = -EPERM; - goto cleanup; - } - - err = drop_priv_caps(&old_caps); - if (!ASSERT_OK(err, "drop_caps")) - goto cleanup; - - /* no caps + token -> failure */ - prog_opts.token_fd = token_fd; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { - err = -EPERM; - goto cleanup; - } - - /* no caps + no token -> definitely a failure */ - prog_opts.token_fd = 0; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { - err = -EPERM; - goto cleanup; - } - - err = 0; -cleanup: - zclose(prog_fd); - zclose(token_fd); - return err; -} - -static int userns_obj_priv_map(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - char buf[256]; - struct priv_map *skel; - int err, token_fd; - - skel = priv_map__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - priv_map__destroy(skel); - return -EINVAL; - } - - /* use bpf_token_path to provide BPF FS path */ - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); - opts.bpf_token_path = buf; - skel = priv_map__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) - return -EINVAL; - - err = priv_map__load(skel); - priv_map__destroy(skel); - if (!ASSERT_OK(err, "obj_token_path_load")) - return -EINVAL; - - /* create token and pass it through bpf_token_fd */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "create_token")) - return -EINVAL; - - opts.bpf_token_path = NULL; - opts.bpf_token_fd = token_fd; - skel = priv_map__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_fd_open")) - return -EINVAL; - - /* we can close our token FD, bpf_object owns dup()'ed FD now */ - close(token_fd); - - err = priv_map__load(skel); - priv_map__destroy(skel); - if (!ASSERT_OK(err, "obj_token_fd_load")) - return -EINVAL; - - return 0; -} - -static int userns_obj_priv_prog(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - char buf[256]; - struct priv_prog *skel; - int err; - - skel = priv_prog__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - priv_prog__destroy(skel); - return -EINVAL; - } - - /* use bpf_token_path to provide BPF FS path */ - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); - opts.bpf_token_path = buf; - skel = priv_prog__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) - return -EINVAL; - - err = priv_prog__load(skel); - priv_prog__destroy(skel); - if (!ASSERT_OK(err, "obj_token_path_load")) - return -EINVAL; - - return 0; -} - -/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, - * which should cause struct_ops application to fail, as BTF won't be uploaded - * into the kernel, even if STRUCT_OPS programs themselves are allowed - */ -static int validate_struct_ops_load(int mnt_fd, bool expect_success) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - char buf[256]; - struct dummy_st_ops_success *skel; - int err; - - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); - opts.bpf_token_path = buf; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) - return -EINVAL; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (expect_success) { - if (!ASSERT_OK(err, "obj_token_path_load")) - return -EINVAL; - } else /* expect failure */ { - if (!ASSERT_ERR(err, "obj_token_path_load")) - return -EINVAL; - } - - return 0; -} - -static int userns_obj_priv_btf_fail(int mnt_fd) -{ - return validate_struct_ops_load(mnt_fd, false /* should fail */); -} - -static int userns_obj_priv_btf_success(int mnt_fd) -{ - return validate_struct_ops_load(mnt_fd, true /* should succeed */); -} - -#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" -#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" - -static int userns_obj_priv_implicit_token(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct dummy_st_ops_success *skel; - int err; - - /* before we mount BPF FS with token delegation, struct_ops skeleton - * should fail to load - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - dummy_st_ops_success__destroy(skel); - return -EINVAL; - } - - /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF - * token automatically and implicitly - */ - err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); - if (!ASSERT_OK(err, "move_mount_bpffs")) - return -EINVAL; - - /* disable implicit BPF token creation by setting - * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail - */ - err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); - if (!ASSERT_OK(err, "setenv_token_path")) - return -EINVAL; - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { - unsetenv(TOKEN_ENVVAR); - dummy_st_ops_success__destroy(skel); - return -EINVAL; - } - unsetenv(TOKEN_ENVVAR); - - /* now the same struct_ops skeleton should succeed thanks to libppf - * creating BPF token from /sys/fs/bpf mount point - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) - return -EINVAL; - - dummy_st_ops_success__destroy(skel); - - /* now disable implicit token through empty bpf_token_path, should fail */ - opts.bpf_token_path = ""; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) - return -EINVAL; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_empty_token_path_load")) - return -EINVAL; - - /* now disable implicit token through negative bpf_token_fd, should fail */ - opts.bpf_token_path = NULL; - opts.bpf_token_fd = -1; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) - return -EINVAL; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) - return -EINVAL; - - return 0; -} - -static int userns_obj_priv_implicit_token_envvar(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct dummy_st_ops_success *skel; - int err; - - /* before we mount BPF FS with token delegation, struct_ops skeleton - * should fail to load - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - dummy_st_ops_success__destroy(skel); - return -EINVAL; - } - - /* mount custom BPF FS over custom location, so libbpf can't create - * BPF token implicitly, unless pointed to it through - * LIBBPF_BPF_TOKEN_PATH envvar - */ - rmdir(TOKEN_BPFFS_CUSTOM); - if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) - goto err_out; - err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); - if (!ASSERT_OK(err, "move_mount_bpffs")) - goto err_out; - - /* even though we have BPF FS with delegation, it's not at default - * /sys/fs/bpf location, so we still fail to load until envvar is set up - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { - dummy_st_ops_success__destroy(skel); - goto err_out; - } - - err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); - if (!ASSERT_OK(err, "setenv_token_path")) - goto err_out; - - /* now the same struct_ops skeleton should succeed thanks to libppf - * creating BPF token from custom mount point - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) - goto err_out; - - dummy_st_ops_success__destroy(skel); - - /* now disable implicit token through empty bpf_token_path, envvar - * will be ignored, should fail - */ - opts.bpf_token_path = ""; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) - goto err_out; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_empty_token_path_load")) - goto err_out; - - /* now disable implicit token through negative bpf_token_fd, envvar - * will be ignored, should fail - */ - opts.bpf_token_path = NULL; - opts.bpf_token_fd = -1; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) - goto err_out; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) - goto err_out; - - rmdir(TOKEN_BPFFS_CUSTOM); - unsetenv(TOKEN_ENVVAR); - return 0; -err_out: - rmdir(TOKEN_BPFFS_CUSTOM); - unsetenv(TOKEN_ENVVAR); - return -EINVAL; -} - -#define bit(n) (1ULL << (n)) - -void test_token(void) -{ - if (test__start_subtest("map_token")) { - struct bpffs_opts opts = { - .cmds_str = "map_create", - .maps_str = "stack", - }; - - subtest_userns(&opts, userns_map_create); - } - if (test__start_subtest("btf_token")) { - struct bpffs_opts opts = { - .cmds = 1ULL << BPF_BTF_LOAD, - }; - - subtest_userns(&opts, userns_btf_load); - } - if (test__start_subtest("prog_token")) { - struct bpffs_opts opts = { - .cmds_str = "PROG_LOAD", - .progs_str = "XDP", - .attachs_str = "xdp", - }; - - subtest_userns(&opts, userns_prog_load); - } - if (test__start_subtest("obj_priv_map")) { - struct bpffs_opts opts = { - .cmds = bit(BPF_MAP_CREATE), - .maps = bit(BPF_MAP_TYPE_QUEUE), - }; - - subtest_userns(&opts, userns_obj_priv_map); - } - if (test__start_subtest("obj_priv_prog")) { - struct bpffs_opts opts = { - .cmds = bit(BPF_PROG_LOAD), - .progs = bit(BPF_PROG_TYPE_KPROBE), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_prog); - } - if (test__start_subtest("obj_priv_btf_fail")) { - struct bpffs_opts opts = { - /* disallow BTF loading */ - .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_btf_fail); - } - if (test__start_subtest("obj_priv_btf_success")) { - struct bpffs_opts opts = { - /* allow BTF loading */ - .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_btf_success); - } - if (test__start_subtest("obj_priv_implicit_token")) { - struct bpffs_opts opts = { - /* allow BTF loading */ - .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_implicit_token); - } - if (test__start_subtest("obj_priv_implicit_token_envvar")) { - struct bpffs_opts opts = { - /* allow BTF loading */ - .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); - } -} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index ac49ec25211d..d62c5bf00e71 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -14,6 +14,7 @@ #include "verifier_bpf_get_stack.skel.h" #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" +#include "verifier_btf_unreliable_prog.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" @@ -125,6 +126,7 @@ void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_u void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } +void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 0fa564a5cc5b..9fe9c4a4e8f6 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -78,7 +78,7 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) } SEC("kretprobe/cgroup_destroy_locked") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("calling kernel function bpf_cgroup_acquire is not allowed") int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) { struct cgroup *acquired; diff --git a/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c new file mode 100644 index 000000000000..624078abf3de --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +SEC("freplace/btf_unreliable_kprobe") +/* context type is what BPF verifier expects for kprobe context, but target + * program has `stuct whatever *ctx` argument, so freplace operation will be + * rejected with the following message: + * + * arg0 replace_btf_unreliable_kprobe(struct pt_regs *) doesn't match btf_unreliable_kprobe(struct whatever *) + */ +int replace_btf_unreliable_kprobe(bpf_user_pt_regs_t *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c deleted file mode 100644 index 9085be50f03b..000000000000 --- a/tools/testing/selftests/bpf/progs/priv_map.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ - -#include "vmlinux.h" -#include <bpf/bpf_helpers.h> - -char _license[] SEC("license") = "GPL"; - -struct { - __uint(type, BPF_MAP_TYPE_QUEUE); - __uint(max_entries, 1); - __type(value, __u32); -} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c deleted file mode 100644 index 3c7b2b618c8a..000000000000 --- a/tools/testing/selftests/bpf/progs/priv_prog.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ - -#include "vmlinux.h" -#include <bpf/bpf_helpers.h> - -char _license[] SEC("license") = "GPL"; - -SEC("kprobe") -int kprobe_prog(void *ctx) -{ - return 1; -} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index dcdea3127086..ad88a3796ddf 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -248,7 +248,7 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl } SEC("lsm/task_free") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("R1 must be a rcu pointer") int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) { struct task_struct *acquired; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index b685a4aba6bd..069db9085e78 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -17,7 +17,7 @@ struct generic_map_value { char _license[] SEC("license") = "GPL"; -const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; int err = 0; @@ -166,7 +166,7 @@ static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \ } while (0) -DEFINE_ARRAY_WITH_KPTR(8); +/* kptr doesn't support bin_data_8 which is a zero-sized array */ DEFINE_ARRAY_WITH_KPTR(16); DEFINE_ARRAY_WITH_KPTR(32); DEFINE_ARRAY_WITH_KPTR(64); @@ -198,21 +198,20 @@ int test_batch_alloc_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 8-bytes objects in batch to trigger refilling, - * then free 128 8-bytes objects in batch to trigger freeing. + /* Alloc 128 16-bytes objects in batch to trigger refilling, + * then free 128 16-bytes objects in batch to trigger freeing. */ - CALL_BATCH_ALLOC_FREE(8, 128, 0); - CALL_BATCH_ALLOC_FREE(16, 128, 1); - CALL_BATCH_ALLOC_FREE(32, 128, 2); - CALL_BATCH_ALLOC_FREE(64, 128, 3); - CALL_BATCH_ALLOC_FREE(96, 128, 4); - CALL_BATCH_ALLOC_FREE(128, 128, 5); - CALL_BATCH_ALLOC_FREE(192, 128, 6); - CALL_BATCH_ALLOC_FREE(256, 128, 7); - CALL_BATCH_ALLOC_FREE(512, 64, 8); - CALL_BATCH_ALLOC_FREE(1024, 32, 9); - CALL_BATCH_ALLOC_FREE(2048, 16, 10); - CALL_BATCH_ALLOC_FREE(4096, 8, 11); + CALL_BATCH_ALLOC_FREE(16, 128, 0); + CALL_BATCH_ALLOC_FREE(32, 128, 1); + CALL_BATCH_ALLOC_FREE(64, 128, 2); + CALL_BATCH_ALLOC_FREE(96, 128, 3); + CALL_BATCH_ALLOC_FREE(128, 128, 4); + CALL_BATCH_ALLOC_FREE(192, 128, 5); + CALL_BATCH_ALLOC_FREE(256, 128, 6); + CALL_BATCH_ALLOC_FREE(512, 64, 7); + CALL_BATCH_ALLOC_FREE(1024, 32, 8); + CALL_BATCH_ALLOC_FREE(2048, 16, 9); + CALL_BATCH_ALLOC_FREE(4096, 8, 10); return 0; } @@ -223,21 +222,20 @@ int test_free_through_map_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 8-bytes objects in batch to trigger refilling, + /* Alloc 128 16-bytes objects in batch to trigger refilling, * then free these objects through map free. */ - CALL_BATCH_ALLOC(8, 128, 0); - CALL_BATCH_ALLOC(16, 128, 1); - CALL_BATCH_ALLOC(32, 128, 2); - CALL_BATCH_ALLOC(64, 128, 3); - CALL_BATCH_ALLOC(96, 128, 4); - CALL_BATCH_ALLOC(128, 128, 5); - CALL_BATCH_ALLOC(192, 128, 6); - CALL_BATCH_ALLOC(256, 128, 7); - CALL_BATCH_ALLOC(512, 64, 8); - CALL_BATCH_ALLOC(1024, 32, 9); - CALL_BATCH_ALLOC(2048, 16, 10); - CALL_BATCH_ALLOC(4096, 8, 11); + CALL_BATCH_ALLOC(16, 128, 0); + CALL_BATCH_ALLOC(32, 128, 1); + CALL_BATCH_ALLOC(64, 128, 2); + CALL_BATCH_ALLOC(96, 128, 3); + CALL_BATCH_ALLOC(128, 128, 4); + CALL_BATCH_ALLOC(192, 128, 5); + CALL_BATCH_ALLOC(256, 128, 6); + CALL_BATCH_ALLOC(512, 64, 7); + CALL_BATCH_ALLOC(1024, 32, 8); + CALL_BATCH_ALLOC(2048, 16, 9); + CALL_BATCH_ALLOC(4096, 8, 10); return 0; } @@ -251,17 +249,17 @@ int test_batch_percpu_alloc_free(void *ctx) /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, * then free 128 16-bytes per-cpu objects in batch to trigger freeing. */ - CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); - CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); - CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); - CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4); - CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5); - CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); - CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); - CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); - CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9); - CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10); - CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11); + CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 0); + CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 1); + CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 2); + CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 3); + CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 4); + CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 5); + CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 6); + CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 7); + CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 8); + CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 9); + CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 10); return 0; } @@ -275,17 +273,17 @@ int test_percpu_free_through_map_free(void *ctx) /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, * then free these object through map free. */ - CALL_BATCH_PERCPU_ALLOC(16, 128, 1); - CALL_BATCH_PERCPU_ALLOC(32, 128, 2); - CALL_BATCH_PERCPU_ALLOC(64, 128, 3); - CALL_BATCH_PERCPU_ALLOC(96, 128, 4); - CALL_BATCH_PERCPU_ALLOC(128, 128, 5); - CALL_BATCH_PERCPU_ALLOC(192, 128, 6); - CALL_BATCH_PERCPU_ALLOC(256, 128, 7); - CALL_BATCH_PERCPU_ALLOC(512, 64, 8); - CALL_BATCH_PERCPU_ALLOC(1024, 32, 9); - CALL_BATCH_PERCPU_ALLOC(2048, 16, 10); - CALL_BATCH_PERCPU_ALLOC(4096, 8, 11); + CALL_BATCH_PERCPU_ALLOC(16, 128, 0); + CALL_BATCH_PERCPU_ALLOC(32, 128, 1); + CALL_BATCH_PERCPU_ALLOC(64, 128, 2); + CALL_BATCH_PERCPU_ALLOC(96, 128, 3); + CALL_BATCH_PERCPU_ALLOC(128, 128, 4); + CALL_BATCH_PERCPU_ALLOC(192, 128, 5); + CALL_BATCH_PERCPU_ALLOC(256, 128, 6); + CALL_BATCH_PERCPU_ALLOC(512, 64, 7); + CALL_BATCH_PERCPU_ALLOC(1024, 32, 8); + CALL_BATCH_PERCPU_ALLOC(2048, 16, 9); + CALL_BATCH_PERCPU_ALLOC(4096, 8, 10); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c index cc55aedaf82d..257c0569ff98 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func5.c +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -26,7 +26,7 @@ int f3(int val, struct __sk_buff *skb) } SEC("tc") -__failure __msg("expected pointer to ctx, but got PTR") +__failure __msg("expects pointer to ctx") int global_func5(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index ec430b71730b..960998f16306 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1075,4 +1075,66 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("tc") +__description("bounds check with JMP_NE for reg edge") +__success __retval(0) +__naked void reg_not_equal_const(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + call %[bpf_get_prandom_u32]; \ + r4 = r0; \ + r4 &= 7; \ + if r4 != 0 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: r1 = r6; \ + r2 = 0; \ + r3 = r10; \ + r3 += -8; \ + r5 = 0; \ + /* The 4th argument of bpf_skb_store_bytes is defined as \ + * ARG_CONST_SIZE, so 0 is not allowed. The 'r4 != 0' \ + * is providing us this exclusion of zero from initial \ + * [0, 7] range. \ + */ \ + call %[bpf_skb_store_bytes]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("bounds check with JMP_EQ for reg edge") +__success __retval(0) +__naked void reg_equal_const(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + call %[bpf_get_prandom_u32]; \ + r4 = r0; \ + r4 &= 7; \ + if r4 == 0 goto l0_%=; \ + r1 = r6; \ + r2 = 0; \ + r3 = r10; \ + r3 += -8; \ + r5 = 0; \ + /* Just the same as what we do in reg_not_equal_const() */ \ + call %[bpf_skb_store_bytes]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c new file mode 100644 index 000000000000..36e033a2e02c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +struct whatever {}; + +SEC("kprobe") +__success __log_level(2) +/* context type is wrong, making it impossible to freplace this program */ +int btf_unreliable_kprobe(struct whatever *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index bd696a431244..9eeb2d89cda8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ -#include <stdbool.h> -#include <errno.h> -#include <string.h> -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "xdp_metadata.h" +#include "bpf_kfuncs.h" int arr[1]; int unkn_idx; @@ -98,4 +97,96 @@ int unguarded_unsupp_global_called(void) return global_unsupp(&x); } +long stack[128]; + +__weak int subprog_nullable_ptr_bad(int *p) +{ + return (*p) * 2; /* bad, missing null check */ +} + +SEC("?raw_tp") +__failure __log_level(2) +__msg("invalid mem access 'mem_or_null'") +int arg_tag_nullable_ptr_fail(void *ctx) +{ + int x = 42; + + return subprog_nullable_ptr_bad(&x); +} + +__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull) +{ + return (*p1) * (*p2); /* good, no need for NULL checks */ +} + +int x = 47; + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_nonnull_ptr_good(void *ctx) +{ + int y = 74; + + return subprog_nonnull_ptr_good(&x, &y); +} + +/* this global subprog can be now called from many types of entry progs, each + * with different context type + */ +__weak int subprog_ctx_tag(void *ctx __arg_ctx) +{ + return bpf_get_stack(ctx, stack, sizeof(stack), 0); +} + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_ctx_raw_tp(void *ctx) +{ + return subprog_ctx_tag(ctx); +} + +SEC("?tp") +__success __log_level(2) +int arg_tag_ctx_tp(void *ctx) +{ + return subprog_ctx_tag(ctx); +} + +SEC("?kprobe") +__success __log_level(2) +int arg_tag_ctx_kprobe(void *ctx) +{ + return subprog_ctx_tag(ctx); +} + +__weak int subprog_dynptr(struct bpf_dynptr *dptr) +{ + long *d, t, buf[1] = {}; + + d = bpf_dynptr_data(dptr, 0, sizeof(long)); + if (!d) + return 0; + + t = *d + 1; + + d = bpf_dynptr_slice(dptr, 0, &buf, sizeof(long)); + if (!d) + return t; + + t = *d + 2; + + return t; +} + +SEC("?xdp") +__success __log_level(2) +int arg_tag_dynptr(struct xdp_md *ctx) +{ + struct bpf_dynptr dptr; + + bpf_dynptr_from_xdp(ctx, 0, &dptr); + + return subprog_dynptr(&dptr); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0617275d93cc..0f456dbab62f 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index cd2fb43eea61..74954f6a8f94 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -6,6 +6,7 @@ export skip_rc=4 export timeout_rc=124 export logfile=/dev/stdout export per_test_logging= +export RUN_IN_NETNS= # Defaults for "settings" file fields: # "timeout" how many seconds to let each test run before running @@ -47,7 +48,7 @@ run_one() { DIR="$1" TEST="$2" - NUM="$3" + local test_num="$3" BASENAME_TEST=$(basename $TEST) @@ -141,6 +142,33 @@ run_one() fi } +in_netns() +{ + local name=$1 + ip netns exec $name bash <<-EOF + BASE_DIR=$BASE_DIR + source $BASE_DIR/kselftest/runner.sh + logfile=$logfile + run_one $DIR $TEST $test_num + EOF +} + +run_in_netns() +{ + local netns=$(mktemp -u ${BASENAME_TEST}-XXXXXX) + local tmplog="/tmp/$(mktemp -u ${BASENAME_TEST}-XXXXXX)" + ip netns add $netns + if [ $? -ne 0 ]; then + echo "# Warning: Create namespace failed for $BASENAME_TEST" + echo "not ok $test_num selftests: $DIR: $BASENAME_TEST # Create NS failed" + fi + ip -n $netns link set lo up + in_netns $netns &> $tmplog + ip netns del $netns &> /dev/null + cat $tmplog + rm -f $tmplog +} + run_many() { echo "TAP version 13" @@ -155,6 +183,12 @@ run_many() logfile="/tmp/$BASENAME_TEST" cat /dev/null > "$logfile" fi - run_one "$DIR" "$TEST" "$test_num" + if [ -n "$RUN_IN_NETNS" ]; then + run_in_netns & + else + run_one "$DIR" "$TEST" "$test_num" + fi done + + wait } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 118e0964bda9..aa646e0661f3 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -44,26 +44,10 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. -ifeq ("$(origin O)", "command line") - KBUILD_OUTPUT := $(O) +ifeq ($(KHDR_INCLUDES),) +KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include endif -ifneq ($(KBUILD_OUTPUT),) - # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot - # expand a shell special character '~'. We use a somewhat tedious way here. - abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) - $(if $(abs_objtree),, \ - $(error failed to create output directory "$(KBUILD_OUTPUT)")) - # $(realpath ...) resolves symlinks - abs_objtree := $(realpath $(abs_objtree)) - KHDR_DIR := ${abs_objtree}/usr/include -else - abs_srctree := $(shell cd $(top_srcdir) && pwd) - KHDR_DIR := ${abs_srctree}/usr/include -endif - -KHDR_INCLUDES := -isystem $(KHDR_DIR) - # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_GEN_FILES) - -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files +all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) define RUN_TESTS BASE_DIR="$(selfdir)"; \ diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 7324ce5363c0..6f2f83990441 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1680,6 +1680,8 @@ int main(int argc, char **argv) { int err; + ksft_print_header(); + pagesize = getpagesize(); thpsize = read_pmd_pagesize(); if (thpsize) @@ -1689,7 +1691,6 @@ int main(int argc, char **argv) ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 14bd68da7466..50818075e566 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -92,6 +92,7 @@ TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh TEST_PROGS += fq_band_pktlimit.sh +TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index e4e3e9405056..61348f71728c 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -803,11 +803,198 @@ cfg_test_dump() cfg_test_dump_common "L2" l2_grps_get } +# Check flush functionality with different parameters. +cfg_test_flush() +{ + local num_entries + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different port. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10 + + # Different VLAN ID. + bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20 + + # Different routing protocol. + bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp + bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra + + # Different state. + bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent + bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp + + bridge mdb flush dev br0 + num_entries=$(bridge mdb show dev br0 | wc -l) + [[ $num_entries -eq 0 ]] + check_err $? 0 "Not all entries flushed after flush all" + + # Check that when flushing by port only entries programmed with the + # specified port are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 port $swp1 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong port" + + bridge mdb flush dev br0 port br0 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_fail $? "Host entry not flushed by specified port" + + bridge mdb flush dev br0 + + # Check that when flushing by VLAN ID only entries programmed with the + # specified VLAN ID are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20 + + bridge mdb flush dev br0 vid 10 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "Entry not flushed by specified VLAN ID" + bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null + check_err $? "Entry flushed by wrong VLAN ID" + + bridge mdb flush dev br0 + + # Check that all permanent entries are flushed when "permanent" is + # specified and that temporary entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by \"permanent\" state" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 + + # Check that all temporary entries are flushed when "nopermanent" is + # specified and that permanent entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_err $? "Entry flushed by wrong state (\"nopermanent\")" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_fail $? "Entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that L2 host entries are not flushed when "nopermanent" is + # specified, but flushed when "permanent" is specified. + + bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")" + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_fail $? "L2 host entry not flushed by \"permanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv4 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv6 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 proto bgp + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong routing protocol" + + bridge mdb flush dev br0 + + # Test that an error is returned when trying to flush using unsupported + # parameters. + + bridge mdb flush dev br0 src_vni 10 &> /dev/null + check_fail $? "Managed to flush by source VNI" + + bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null + check_fail $? "Managed to flush by destination IP" + + bridge mdb flush dev br0 dst_port 4789 &> /dev/null + check_fail $? "Managed to flush by UDP destination port" + + bridge mdb flush dev br0 vni 10 &> /dev/null + check_fail $? "Managed to flush by destination VNI" + + log_test "Flush tests" +} + cfg_test() { cfg_test_host cfg_test_port cfg_test_dump + cfg_test_flush } __fwd_test_host_ip() @@ -1166,8 +1353,8 @@ ctrl_test() ctrl_mldv2_is_in_test } -if ! bridge mdb help 2>&1 | grep -q "get"; then - echo "SKIP: iproute2 too old, missing bridge mdb get support" +if ! bridge mdb help 2>&1 | grep -q "flush"; then + echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh index 39e736f30322..50d5bfb17ef1 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh @@ -25,6 +25,10 @@ traffic_test() local after= local delta= + if [ ${has_pmac_stats[$if]} = false ]; then + src="aggregate" + fi + before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO @@ -155,15 +159,48 @@ manual_failed_verification_h2_to_h1() manual_failed_verification $h2 $h1 } +smallest_supported_add_frag_size() +{ + local iface=$1 + local rx_min_frag_size= + + rx_min_frag_size=$(ethtool --json --show-mm $iface | \ + jq '.[]."rx-min-frag-size"') + + if [ $rx_min_frag_size -le 60 ]; then + echo 0 + elif [ $rx_min_frag_size -le 124 ]; then + echo 1 + elif [ $rx_min_frag_size -le 188 ]; then + echo 2 + elif [ $rx_min_frag_size -le 252 ]; then + echo 3 + else + echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP" + exit 1 + fi +} + +expected_add_frag_size() +{ + local iface=$1 + local requested=$2 + local min=$(smallest_supported_add_frag_size $iface) + + [ $requested -le $min ] && echo $min || echo $requested +} + lldp_change_add_frag_size() { local add_frag_size=$1 + local pattern= lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null # Wait for TLVs to be received sleep 2 - lldptool -i $h2 -t -n -V addEthCaps | \ - grep -q "Additional fragment size: $add_frag_size" + pattern=$(printf "Additional fragment size: %d" \ + $(expected_add_frag_size $h1 $add_frag_size)) + lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern" } lldp() @@ -284,6 +321,13 @@ for netif in ${NETIFS[@]}; do echo "SKIP: $netif does not support MAC Merge" exit $ksft_skip fi + + if check_ethtool_pmac_std_stats_support $netif eth-mac; then + has_pmac_stats[$netif]=true + else + has_pmac_stats[$netif]=false + echo "$netif does not report pMAC statistics, falling back to aggregate" + fi done trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e3740163c384..69ef2a40df21 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -155,6 +155,15 @@ check_ethtool_counter_group_support() fi } +check_ethtool_pmac_std_stats_support() +{ + local dev=$1; shift + local grp=$1; shift + + [ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \ + | jq ".[].\"$grp\" | length") ] +} + check_locked_port_support() { if ! bridge -d link show | grep -q " locked"; then diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh index 3224651db97b..5100d90f92d2 100755 --- a/tools/testing/selftests/net/gre_gso.sh +++ b/tools/testing/selftests/net/gre_gso.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking GRE GSO. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS="gre_gso" @@ -13,8 +11,6 @@ TESTS="gre_gso" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns ns1" -NS_EXEC="ip netns exec ns1" TMPFILE=`mktemp` PID= @@ -50,13 +46,13 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up + setup_ns ns1 + IP="ip -netns $ns1" + NS_EXEC="ip netns exec $ns1" ip link add veth0 type veth peer name veth1 ip link set veth0 up - ip link set veth1 netns ns1 + ip link set veth1 netns $ns1 $IP link set veth1 name veth0 $IP link set veth0 up @@ -70,7 +66,7 @@ cleanup() [ -n "$PID" ] && kill $PID ip link del dev gre1 &> /dev/null ip link del dev veth0 &> /dev/null - ip netns del ns1 + cleanup_ns $ns1 } get_linklocal() @@ -145,7 +141,7 @@ gre6_gso_test() setup a1=$(get_linklocal veth0) - a2=$(get_linklocal veth0 ns1) + a2=$(get_linklocal veth0 $ns1) gre_create_tun $a1 $a2 diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 342ad27f631b..19352f106c1d 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -23,11 +23,11 @@ run_test() { # on every try. for tries in {1..3}; do # Actual test starts here - ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ 1>>log.txt & server_pid=$! sleep 0.5 # to allow for socket init - ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ 1>>log.txt wait "${server_pid}" exit_code=$? diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 75e3fdacdf73..0f217a1cc837 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -146,6 +146,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { .so_protocol = IPPROTO_SCTP, }; +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { .so_domain = AF_INET6, .so_type = SOCK_STREAM, @@ -164,6 +170,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { .so_protocol = IPPROTO_SCTP, }; +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + TEST_F(ip_local_port_range, invalid_option_value) { __u16 val16; diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 95b498efacd1..04fcb8a077c9 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -56,7 +56,7 @@ __chk_nr() local command="$1" local expected=$2 local msg="$3" - local skip="${4:-SKIP}" + local skip="${4-SKIP}" local nr nr=$(eval $command) @@ -182,6 +182,15 @@ chk_msk_inuse() __chk_nr get_msk_inuse $expected "$msg" 0 } +# $1: cestab nr +chk_msk_cestab() +{ + local cestab=$1 + + __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ + "${cestab}" "....chk ${cestab} cestab" "" +} + wait_connected() { local listener_ns="${1}" @@ -219,9 +228,11 @@ chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_cestab 2 flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 echo "a" | \ timeout ${timeout_test} \ @@ -237,9 +248,11 @@ echo "b" | \ wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_cestab 1 flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do @@ -261,9 +274,11 @@ done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_cestab $((NR_CLIENTS*2)) flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 8362ea454af3..3a5b63026191 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -56,6 +56,8 @@ unset FAILING_LINKS unset test_linkfail unset addr_nr_ns1 unset addr_nr_ns2 +unset cestab_ns1 +unset cestab_ns2 unset sflags unset fastclose unset fullmesh @@ -976,6 +978,34 @@ pm_nl_set_endpoint() fi } +chk_cestab_nr() +{ + local ns=$1 + local cestab=$2 + local count + + print_check "cestab $cestab" + count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$cestab" ]; then + fail_test "got $count current establish[s] expected $cestab" + else + print_ok + fi +} + +# $1 namespace 1, $2 namespace 2 +check_cestab() +{ + if [ -n "${cestab_ns1}" ]; then + chk_cestab_nr ${1} ${cestab_ns1} + fi + if [ -n "${cestab_ns2}" ]; then + chk_cestab_nr ${2} ${cestab_ns2} + fi +} + do_transfer() { local listener_ns="$1" @@ -1089,6 +1119,7 @@ do_transfer() local cpid=$! pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr + check_cestab $listener_ns $connector_ns wait $cpid local retc=$? @@ -2477,47 +2508,52 @@ add_tests() if reset "add single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - addr_nr_ns2=1 speed=slow \ + addr_nr_ns2=1 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 + chk_cestab_nr $ns2 0 fi # add signal address if reset "add signal address"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 - addr_nr_ns1=1 speed=slow \ + addr_nr_ns1=1 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 + chk_cestab_nr $ns1 0 fi # add multiple subflows if reset "add multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple subflows IPv6 if reset "add multiple subflows IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple addresses IPv6 if reset "add multiple addresses IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 - addr_nr_ns1=2 speed=slow \ + addr_nr_ns1=2 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 chk_add_nr 2 2 + chk_cestab_nr $ns1 0 fi } @@ -2756,7 +2792,7 @@ backup_tests() fi if reset "mpc backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2765,7 +2801,7 @@ backup_tests() fi if reset "mpc backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ @@ -2775,7 +2811,7 @@ backup_tests() fi if reset "mpc switch to backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2784,7 +2820,7 @@ backup_tests() fi if reset "mpc switch to backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 7d3d3fc99461..6974474c26f3 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source lib.sh set -o pipefail -NS=netns-name-test DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name @@ -11,7 +11,7 @@ ALT_NAME=some-alt-name RET_CODE=0 cleanup() { - ip netns del $NS + cleanup_ns $NS $test_ns } trap cleanup EXIT @@ -21,50 +21,50 @@ fail() { RET_CODE=1 } -ip netns add $NS +setup_ns NS test_ns # # Test basic move without a rename # ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 || +ip -netns $NS link set dev $DEV netns $test_ns || fail "Can't perform a netns move" -ip link show dev $DEV >> /dev/null || fail "Device not found after move" -ip link del $DEV || fail +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link del $DEV || fail # # Test move with a conflict # -ip link add name $DEV type dummy +ip -netns $test_ns link add name $DEV type dummy ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 2> /dev/null && +ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null && fail "Performed a netns move with a name conflict" -ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" ip -netns $NS link del $DEV || fail -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail # # Test move with a conflict and rename # -ip link add name $DEV type dummy +ip -netns $test_ns link add name $DEV type dummy ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 name $DEV2 || +ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 || fail "Can't perform a netns move with rename" -ip link del $DEV2 || fail -ip link del $DEV || fail +ip -netns $test_ns link del $DEV2 || fail +ip -netns $test_ns link del $DEV || fail # # Test dup alt-name with netns move # -ip link add name $DEV type dummy || fail -ip link property add dev $DEV altname $ALT_NAME || fail +ip -netns $test_ns link add name $DEV type dummy || fail +ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail ip -netns $NS link add name $DEV2 type dummy || fail ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail -ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null && +ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null && fail "Moved with alt-name dup" -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail ip -netns $NS link del $DEV2 || fail # @@ -72,11 +72,11 @@ ip -netns $NS link del $DEV2 || fail # ip -netns $NS link add name $DEV type dummy || fail ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail -ip -netns $NS link set dev $DEV netns 1 || fail -ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" -ip -netns $NS link show dev $ALT_NAME 2> /dev/null && +ip -netns $NS link set dev $DEV netns $test_ns || fail +ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" +ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index b3b2dc5a630c..f10879788f61 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Check that route PMTU values match expectations, and that initial device MTU @@ -198,8 +198,7 @@ # - pmtu_ipv6_route_change # Same as above but with IPv6 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -268,16 +267,6 @@ tests=" pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" -NS_A="ns-A" -NS_B="ns-B" -NS_C="ns-C" -NS_R1="ns-R1" -NS_R2="ns-R2" -ns_a="ip netns exec ${NS_A}" -ns_b="ip netns exec ${NS_B}" -ns_c="ip netns exec ${NS_C}" -ns_r1="ip netns exec ${NS_R1}" -ns_r2="ip netns exec ${NS_R2}" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -543,13 +532,17 @@ setup_ip6ip6() { } setup_namespaces() { + setup_ns NS_A NS_B NS_C NS_R1 NS_R2 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do - ip netns add ${n} || return 1 - # Disable DAD, so that we don't have to wait to use the # configured IPv6 addresses ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 done + ns_a="ip netns exec ${NS_A}" + ns_b="ip netns exec ${NS_B}" + ns_c="ip netns exec ${NS_C}" + ns_r1="ip netns exec ${NS_R1}" + ns_r2="ip netns exec ${NS_R2}" } setup_veth() { @@ -839,7 +832,7 @@ setup_bridge() { run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C - run_cmd ${ns_c} ip link set veth_A-C netns ns-A + run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A} run_cmd ${ns_a} ip link set veth_A-C up run_cmd ${ns_c} ip link set veth_C-A up @@ -944,9 +937,7 @@ cleanup() { done socat_pids= - for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do - ip netns del ${n} 2> /dev/null - done + cleanup_all_ns ip link del veth_A-C 2>/dev/null ip link del veth_A-R1 2>/dev/null diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 38be9706c45f..a10a32952f21 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -35,8 +35,7 @@ VERBOSE=0 PAUSE=no PAUSE_ON_FAIL=no -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # set global exit status, but never reset nonzero one. check_err() @@ -297,7 +296,7 @@ kci_test_addrlft() done sleep 5 - run_cmd_grep "10.23.11." ip addr show dev "$devdummy" + run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" if [ $? -eq 0 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" @@ -517,9 +516,8 @@ kci_test_encap_fou() # test various encap methods, use netns to avoid unwanted interference kci_test_encap() { - testns="testns" local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP encap tests: cannot add net namespace $testns" return $ksft_skip @@ -574,6 +572,10 @@ kci_test_macsec_offload() return $ksft_skip fi + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then run_cmd modprobe -q netdevsim @@ -738,6 +740,10 @@ kci_test_ipsec_offload() sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then run_cmd modprobe -q netdevsim @@ -836,11 +842,10 @@ EOF kci_test_gretap() { - testns="testns" DEV_NS=gretap00 local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP gretap tests: cannot add net namespace $testns" return $ksft_skip @@ -878,11 +883,10 @@ kci_test_gretap() kci_test_ip6gretap() { - testns="testns" DEV_NS=ip6gretap00 local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP ip6gretap tests: cannot add net namespace $testns" return $ksft_skip @@ -920,7 +924,6 @@ kci_test_ip6gretap() kci_test_erspan() { - testns="testns" DEV_NS=erspan00 local ret=0 run_cmd_grep "^Usage:" ip link help erspan @@ -928,7 +931,7 @@ kci_test_erspan() end_test "SKIP: erspan: iproute2 too old" return $ksft_skip fi - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP erspan tests: cannot add net namespace $testns" return $ksft_skip @@ -970,7 +973,6 @@ kci_test_erspan() kci_test_ip6erspan() { - testns="testns" DEV_NS=ip6erspan00 local ret=0 run_cmd_grep "^Usage:" ip link help ip6erspan @@ -978,7 +980,7 @@ kci_test_ip6erspan() end_test "SKIP: ip6erspan: iproute2 too old" return $ksft_skip fi - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP ip6erspan tests: cannot add net namespace $testns" return $ksft_skip @@ -1022,8 +1024,6 @@ kci_test_ip6erspan() kci_test_fdb_get() { - IP="ip -netns testns" - BRIDGE="bridge -netns testns" brdev="test-br0" vxlandev="vxlan10" test_mac=de:ad:be:ef:13:37 @@ -1037,11 +1037,13 @@ kci_test_fdb_get() return $ksft_skip fi - run_cmd ip netns add testns + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP fdb get tests: cannot add net namespace $testns" return $ksft_skip fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \ dstport 4789 run_cmd $IP link add name "$brdev" type bridge @@ -1052,7 +1054,7 @@ kci_test_fdb_get() run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev" run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self - ip netns del testns &>/dev/null + ip netns del $testns &>/dev/null if [ $ret -ne 0 ]; then end_test "FAIL: bridge fdb get" diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh index e57bbfbc5208..2070b57849de 100755 --- a/tools/testing/selftests/net/setup_loopback.sh +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -5,6 +5,8 @@ readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" readonly HARD_IRQS="$(< ${IRQ_PATH})" +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) netdev_check_for_carrier() { local -r dev="$1" @@ -97,12 +99,12 @@ setup_interrupt() { setup_ns() { # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}" + setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" } cleanup_ns() { - cleanup_macvlan_ns server_ns server client_ns client + cleanup_macvlan_ns ${server_ns} server ${client_ns} client } setup() { diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 1003ddf7b3b2..a9a1759e035c 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) + setup_veth_ns() { local -r link_dev="$1" local -r ns_name="$2" @@ -19,14 +22,14 @@ setup_ns() { # Set up server_ns namespace and client_ns namespace ip link add name server type veth peer name client - setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}" - setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}" + setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" } cleanup_ns() { local ns_name - for ns_name in client_ns server_ns; do + for ns_name in ${client_ns} ${server_ns}; do [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" done } diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh index 4de11da4092b..94d5d1a1c90f 100755 --- a/tools/testing/selftests/net/stress_reuseport_listen.sh +++ b/tools/testing/selftests/net/stress_reuseport_listen.sh @@ -2,18 +2,18 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (c) 2022 Meta Platforms, Inc. and affiliates. -NS='stress_reuseport_listen_ns' +source lib.sh NR_FILES=24100 SAVED_NR_FILES=$(ulimit -n) setup() { - ip netns add $NS + setup_ns NS ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1 ulimit -n $NR_FILES } cleanup() { - ip netns del $NS + cleanup_ns $NS ulimit -n $SAVED_NR_FILES } diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 6343cfcf919b..8e60bae67aa9 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -17,22 +17,18 @@ TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS) top_srcdir := ../../../../.. -KSFT_KHDR_INSTALL := 1 include ../../lib.mk HOSTAR ?= ar -# Drop it on port to linux/master with commit 8ce72dc32578 -.DEFAULT_GOAL := all - LIBDIR := $(OUTPUT)/lib LIB := $(LIBDIR)/libaotst.a LDLIBS += $(LIB) -pthread LIBDEPS := lib/aolib.h Makefile CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing -CFLAGS += -I ../../../../../usr/include/ -iquote $(LIBDIR) -CFLAGS += -I ../../../../include/ +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -iquote ./lib/ -I ../../../../include/ # Library LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c @@ -43,6 +39,7 @@ $(LIB): $(LIBOBJ) $(HOSTAR) rcs $@ $^ $(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS) + mkdir -p $(LIBDIR) $(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c $(TEST_GEN_PROGS): $(LIB) diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c index 7be8a7d9308c..a1e6e007c291 100644 --- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -46,8 +46,10 @@ static void test_add_routes(union tcp_addr *ips, size_t ips_nr) for (i = 0; i < ips_nr; i++) { union tcp_addr *p = (union tcp_addr *)&ips[i]; + int err; - if (ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p)) + err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p); + if (err && err != -EEXIST) test_error("Failed to add route"); } } diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index 1ca78040d8b7..185a2f6e5ff3 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -55,7 +55,7 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, err = test_wait_fd(lsk, timeout, 0); if (err == -ETIMEDOUT) { if (!fault(TIMEOUT)) - test_fail("timeouted for accept()"); + test_fail("timed out for accept()"); } else if (err < 0) { test_error("test_wait_fd()"); } else { diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c index b731f2c84083..7f108493a29a 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/netlink.c +++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c @@ -261,7 +261,7 @@ static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family, req.nh.nlmsg_seq = seq; req.rt.rtm_family = family; req.rt.rtm_dst_len = (family == AF_INET) ? 32 : 128; - req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_table = vrf; req.rt.rtm_protocol = RTPROT_BOOT; req.rt.rtm_scope = RT_SCOPE_UNIVERSE; req.rt.rtm_type = RTN_UNICAST; @@ -294,8 +294,6 @@ int ip_route_add_vrf(const char *intf, int family, ret = __ip_route_add(route_sock, route_seq++, intf, family, src, dst, vrf); - if (ret == -EEXIST) /* ignoring */ - ret = 0; close(route_sock); return ret; diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c index 2322f4d4676d..2fb6dd8adba6 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/proc.c +++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c @@ -227,7 +227,7 @@ void netstat_print_diff(struct netstat *nsa, struct netstat *nsb) } if (nsb->counters_nr < nsa->counters_nr) - test_error("Unexpected: some counters dissapeared!"); + test_error("Unexpected: some counters disappeared!"); for (j = 0, i = 0; i < nsb->counters_nr; i++) { if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) { @@ -244,7 +244,7 @@ void netstat_print_diff(struct netstat *nsa, struct netstat *nsb) j++; } if (j != nsa->counters_nr) - test_error("Unexpected: some counters dissapeared!"); + test_error("Unexpected: some counters disappeared!"); nsb = nsb->next; nsa = nsa->next; diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index 374b27c26ebd..92276f916f2f 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -277,22 +277,38 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix, /* /proc/sys/net/core/optmem_max artifically limits the amount of memory * that can be allocated with sock_kmalloc() on each socket in the system. - * It is not virtualized, so it has to written outside test namespaces. - * To be nice a test will revert optmem back to the old value. + * It is not virtualized in v6.7, so it has to written outside test + * namespaces. To be nice a test will revert optmem back to the old value. * Keeping it simple without any file lock, which means the tests that * need to set/increase optmem value shouldn't run in parallel. * Also, not re-entrant. + * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max") + * it is per-namespace, keeping logic for non-virtualized optmem_max + * for v6.7, which supports TCP-AO. */ static const char *optmem_file = "/proc/sys/net/core/optmem_max"; static size_t saved_optmem; +static int optmem_ns = -1; + +static bool is_optmem_namespaced(void) +{ + if (optmem_ns == -1) { + int old_ns = switch_save_ns(nsfd_child); + + optmem_ns = !access(optmem_file, F_OK); + switch_ns(old_ns); + } + return !!optmem_ns; +} size_t test_get_optmem(void) { + int old_ns = 0; FILE *foptmem; - int old_ns; size_t ret; - old_ns = switch_save_ns(nsfd_outside); + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); foptmem = fopen(optmem_file, "r"); if (!foptmem) test_error("failed to open %s", optmem_file); @@ -300,19 +316,21 @@ size_t test_get_optmem(void) if (fscanf(foptmem, "%zu", &ret) != 1) test_error("can't read from %s", optmem_file); fclose(foptmem); - switch_ns(old_ns); + if (!is_optmem_namespaced()) + switch_ns(old_ns); return ret; } static void __test_set_optmem(size_t new, size_t *old) { + int old_ns = 0; FILE *foptmem; - int old_ns; if (old != NULL) *old = test_get_optmem(); - old_ns = switch_save_ns(nsfd_outside); + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); foptmem = fopen(optmem_file, "w"); if (!foptmem) test_error("failed to open %s", optmem_file); @@ -320,7 +338,8 @@ static void __test_set_optmem(size_t new, size_t *old) if (fprintf(foptmem, "%zu", new) <= 0) test_error("can't write %zu to %s", new, optmem_file); fclose(foptmem); - switch_ns(old_ns); + if (!is_optmem_namespaced()) + switch_ns(old_ns); } static void test_revert_optmem(void) diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 7e4601b3f6a3..452de131fa3a 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -118,7 +118,7 @@ static void setsockopt_checked(int sk, int optname, void *optval, break; default: break; - }; + } __setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst); } @@ -427,7 +427,7 @@ static void test_einval_del_key(void) sk = prepare_defs(TCP_AO_DEL_KEY, &del); del.set_current = 1; - setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-exising current key"); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key"); sk = prepare_defs(TCP_AO_DEL_KEY, &del); del.set_rnext = 1; diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index 7cffde02d2be..c5b568cd7d90 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -30,7 +30,7 @@ static void setup_vrfs(void) err = ip_route_add_vrf(veth_name, TEST_FAMILY, this_ip_addr, this_ip_dest, test_vrf_tabid); if (err) - test_error("Failed to add a route to VRF"); + test_error("Failed to add a route to VRF: %d", err); } static void try_accept(const char *tst_name, unsigned int port, @@ -72,7 +72,7 @@ static void try_accept(const char *tst_name, unsigned int port, err = test_wait_fd(lsk, timeout, 0); if (err == -ETIMEDOUT) { if (!fault(TIMEOUT)) - test_fail("timeouted for accept()"); + test_fail("timed out for accept()"); } else if (err < 0) { test_error("test_wait_fd()"); } else { @@ -494,15 +494,14 @@ out: static void client_add_ip(union tcp_addr *client, const char *ip) { - int family = TEST_FAMILY; + int err, family = TEST_FAMILY; if (inet_pton(family, ip, client) != 1) test_error("Can't convert ip address %s", ip); - if (ip_addr_add(veth_name, family, *client, TEST_PREFIX)) - test_error("Failed to add ip address"); - if (ip_route_add(veth_name, family, *client, this_ip_dest)) - test_error("Failed to add route"); + err = ip_addr_add(veth_name, family, *client, TEST_PREFIX); + if (err) + test_error("Failed to add ip address: %d", err); } static void client_add_ips(void) diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 6725fd9157b9..84a05a9e46d8 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -79,6 +79,7 @@ CONTROL_PATH_TESTS=" dump_ipv6_ipv4 dump_ipv4_ipv6 dump_ipv6_ipv6 + flush " DATA_PATH_TESTS=" @@ -968,6 +969,202 @@ dump_ipv6_ipv6() dump_common $ns1 $local_addr $remote_prefix $fn } +flush() +{ + local num_entries + + echo + echo "Control path: Flush" + echo "-------------------" + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different source VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011" + + # Different routing protocol. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010" + + # Different destination IP. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010" + + # Different destination port. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010" + + # Different VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l) + [[ $num_entries -eq 0 ]] + log_test $? 0 "Flush all" + + # Check that entries are flushed when port is specified as the VXLAN + # device and that an error is returned when port is specified as a + # different net device. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0" + log_test $? 255 "Flush by wrong port" + + # Check that when flushing by source VNI only entries programmed with + # the specified source VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by specified source VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011" + log_test $? 0 "Flush by unspecified source VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that all entries are flushed when "permanent" is specified and + # that an error is returned when "nopermanent" is specified. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by \"permanent\" state" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent" + log_test $? 255 "Flush by \"nopermanent\" state" + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\"" + log_test $? 1 "Flush by specified routing protocol" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\"" + log_test $? 0 "Flush by unspecified routing protocol" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination IP only entries programmed + # with the specified destination IP are flushed and the rest are not. + + # IPv4. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 1 "Flush by specified destination IP - IPv4" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 0 "Flush by unspecified destination IP - IPv4" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # IPv6. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2" + log_test $? 1 "Flush by specified destination IP - IPv6" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1" + log_test $? 0 "Flush by unspecified destination IP - IPv6" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by UDP destination port only entries + # programmed with the specified port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\"" + log_test $? 1 "Flush by specified UDP destination port" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\"" + log_test $? 0 "Flush by unspecified UDP destination port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a UDP destination port for an entry, traffic is + # encapsulated with the device's UDP destination port. Check that when + # flushing by the device's UDP destination port only entries programmed + # with this port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by device's UDP destination port" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by unspecified UDP destination port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination VNI only entries programmed + # with the specified destination VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\"" + log_test $? 1 "Flush by specified destination VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\"" + log_test $? 0 "Flush by unspecified destination VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a destination VNI for an entry, traffic is + # encapsulated with the source VNI. Check that when flushing by a + # destination VNI that is equal to the source VNI only such entries are + # flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by destination VNI equal to source VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by unspecified destination VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Test that an error is returned when trying to flush using VLAN ID. + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10" + log_test $? 255 "Flush by VLAN ID" +} + ################################################################################ # Tests - Data path @@ -2292,9 +2489,9 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi -bridge mdb help 2>&1 | grep -q "get" +bridge mdb help 2>&1 | grep -q "flush" if [ $? -ne 0 ]; then - echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support" + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index da5bfd834eff..8ff172f7bb1b 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -147,14 +147,14 @@ setup() { setup_loopback_environment "${DEV}" # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" server_ns server \ + setup_macvlan_ns "${DEV}" $server_ns server \ "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" client_ns client \ + setup_macvlan_ns "${DEV}" $client_ns client \ "${CLIENT_MAC}" "${CLIENT_IP}" } cleanup() { - cleanup_macvlan_ns server_ns server client_ns client + cleanup_macvlan_ns $server_ns server $client_ns client cleanup_loopback "${DEV}" } @@ -170,22 +170,22 @@ if [[ "${TEST_RSS}" = true ]]; then # RPS/RFS must be disabled because they move packets between cpus, # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ -C "$(get_rx_irq_cpus)" -s -v & elif [[ ! -z "${RPS_MAP}" ]]; then eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ -r "0x${RPS_MAP}" -s -v & else - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & fi server_pid=$! -ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ +ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & client_pid=$! diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index b7a2cb9e7477..f52aa5f7da52 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project @@ -28,7 +28,7 @@ # These tests provide an easy way to flip the expected result of any # of these behaviors for testing kernel patches that change them. -source ./lib.sh +source lib.sh # nettest can be run from PATH or from same directory as this selftest if ! which nettest >/dev/null; then diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh new file mode 100755 index 000000000000..7bc804ffaf7c --- /dev/null +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +ret=0 + +cleanup() { + ip netns del $NETNS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +ip netns add ${NETNS} +ip netns exec ${NETNS} ip link add bond0 type bond mode 0 +ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 +ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 +ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off +ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 +ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 +ip netns exec ${NETNS} ip link set bond_slave_1 nomaster +ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + +exit $ret diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index bdf450eaf60c..457789530645 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -18,8 +18,7 @@ # ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) # ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ret=0 policy_checks_ok=1 @@ -204,24 +203,24 @@ check_xfrm() { ip=$2 local lret=0 - ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi - ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi @@ -270,11 +269,11 @@ check_hthresh_repeat() i=0 for i in $(seq 1 10);do - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break done if [ $i -ne 10 ] ;then @@ -347,79 +346,80 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -for i in 1 2 3 4; do - ip netns add ns$i - ip -net ns$i link set lo up -done +setup_ns ns1 ns2 ns3 ns4 +ns[1]=$ns1 +ns[2]=$ns2 +ns[3]=$ns3 +ns[4]=$ns4 DEV=veth0 -ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 -ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 +ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]} +ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]} -ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 +ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]} DEV=veth0 for i in 1 2; do - ip -net ns$i link set $DEV up - ip -net ns$i addr add 10.0.$i.2/24 dev $DEV - ip -net ns$i addr add dead:$i::2/64 dev $DEV - - ip -net ns$i addr add 10.0.$i.253 dev $DEV - ip -net ns$i addr add 10.0.$i.254 dev $DEV - ip -net ns$i addr add dead:$i::fd dev $DEV - ip -net ns$i addr add dead:$i::fe dev $DEV + ip -net ${ns[$i]} link set $DEV up + ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV + + ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV + ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV done for i in 3 4; do -ip -net ns$i link set eth1 up -ip -net ns$i link set veth0 up + ip -net ${ns[$i]} link set eth1 up + ip -net ${ns[$i]} link set veth0 up done -ip -net ns1 route add default via 10.0.1.1 -ip -net ns2 route add default via 10.0.2.1 +ip -net ${ns[1]} route add default via 10.0.1.1 +ip -net ${ns[2]} route add default via 10.0.2.1 -ip -net ns3 addr add 10.0.1.1/24 dev eth1 -ip -net ns3 addr add 10.0.3.1/24 dev veth0 -ip -net ns3 addr add 2001:1::1/64 dev eth1 -ip -net ns3 addr add 2001:3::1/64 dev veth0 +ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1 +ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0 +ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1 +ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0 -ip -net ns3 route add default via 10.0.3.10 +ip -net ${ns[3]} route add default via 10.0.3.10 -ip -net ns4 addr add 10.0.2.1/24 dev eth1 -ip -net ns4 addr add 10.0.3.10/24 dev veth0 -ip -net ns4 addr add 2001:2::1/64 dev eth1 -ip -net ns4 addr add 2001:3::10/64 dev veth0 -ip -net ns4 route add default via 10.0.3.1 +ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1 +ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0 +ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1 +ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0 +ip -net ${ns[4]} route add default via 10.0.3.1 for j in 4 6; do for i in 3 4;do - ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null - ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null done done # abuse iptables rule counter to check if ping matches a policy -ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec -ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec if [ $? -ne 0 ];then echo "SKIP: Could not insert iptables rule" - for i in 1 2 3 4;do ip netns del ns$i;done + cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ksft_skip fi # localip remoteip localnet remotenet -do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 -do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 -do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 -do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 +do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 -do_dummies4 ns3 -do_dummies6 ns4 +do_dummies4 ${ns[3]} +do_dummies6 ${ns[4]} -do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 -do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 -do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 -do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 +do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64 +do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64 # ping to .254 should use ipsec, exception is not installed. check_xfrm 1 254 @@ -432,11 +432,11 @@ fi # installs exceptions # localip remoteip encryptdst plaindst -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 -do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 -do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 -do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 +do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 check_exceptions "exceptions" if [ $? -ne 0 ]; then @@ -444,14 +444,14 @@ if [ $? -ne 0 ]; then fi # insert block policies with adjacent/overlapping netmasks -do_overlap ns3 +do_overlap ${ns[3]} check_exceptions "exceptions and block policies" if [ $? -ne 0 ]; then ret=1 fi -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 sleep $((RANDOM%5)) done @@ -459,19 +459,19 @@ done check_exceptions "exceptions and block policies after hresh changes" # full flush of policy db, check everything gets freed incl. internal meta data -ip -net ns3 xfrm policy flush +ip -net ${ns[3]} xfrm policy flush -do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 # move inexact policies to hash table -ip -net ns3 xfrm policy set hthresh4 16 16 +ip -net ${ns[3]} xfrm policy set hthresh4 16 16 sleep $((RANDOM%5)) check_exceptions "exceptions and block policies after hthresh change in ns3" # restore original hthresh settings -- move policies back to tables -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 sleep $((RANDOM%5)) done @@ -479,8 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" -check_random_order ns3 "policies inserted in random order" +check_random_order ${ns[3]} "policies inserted in random order" -for i in 1 2 3 4;do ip netns del ns$i;done +cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ret diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4b2928e1c19d..c2229b3e40d4 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -2,3 +2,5 @@ nf-queue connect_close audit_logread +conntrack_dump_flush +sctp_collision diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index bced422b78f7..db27153eb4a0 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -14,6 +14,7 @@ HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \ + conntrack_dump_flush include ../lib.mk diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c new file mode 100644 index 000000000000..f18c6db13bbf --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <time.h> +#include <libmnl/libmnl.h> +#include <netinet/ip.h> + +#include <linux/netlink.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include "../kselftest_harness.h" + +#define TEST_ZONE_ID 123 +#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2) + +static int reply_counter; + +static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, + uint32_t src_ip, uint32_t dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip); + mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, + struct in6_addr src_ip, struct in6_addr dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip); + mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_proto(struct nlmsghdr *nlh) +{ + struct nlattr *nest, *nest_proto; + + nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO); + if (!nest) + return -1; + + nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *rplnlh; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + ret = build_cta_proto(nlh); + if (ret < 0) { + perror("build_cta_proto"); + return -1; + } + mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000)); + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + if (errno == EEXIST) { + /* The entries are probably still there from a previous + * run. So we are good + */ + return 0; + } + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip, + uint32_t dst_ip, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int conntrack_data_generate_v6(struct mnl_socket *sock, + struct in6_addr src_ip, + struct in6_addr dst_ip, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET6; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int count_entries(const struct nlmsghdr *nlh, void *data) +{ + reply_counter++; +} + +static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + reply_counter = 0; + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, + count_entries, NULL); + if (ret <= MNL_CB_STOP) + break; + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + } + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + return reply_counter; +} + +static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +FIXTURE(conntrack_dump_flush) +{ + struct mnl_socket *sock; +}; + +FIXTURE_SETUP(conntrack_dump_flush) +{ + struct in6_addr src, dst; + int ret; + + self->sock = mnl_socket_open(NETLINK_NETFILTER); + if (!self->sock) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + if (ret < 0 && errno == EPERM) + SKIP(return, "Needs to be run as root"); + else if (ret < 0 && errno == EOPNOTSUPP) + SKIP(return, "Kernel does not seem to support conntrack zones"); + + ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x01000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x02000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x03000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x04000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x05000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x06000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_GE(ret, 2); + if (ret > 2) + SKIP(return, "kernel does not support filtering by zone"); +} + +FIXTURE_TEARDOWN(conntrack_dump_flush) +{ +} + +TEST_F(conntrack_dump_flush, test_dump_by_zone) +{ + int ret; + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 92743980e553..a28c1416cb89 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -20,11 +20,13 @@ usage() { cat <<EOF Usage: $0 [OPTIONS] - -s | --summary Print summary with detailed log in output.log + -s | --summary Print summary with detailed log in output.log (conflict with -p) + -p | --per_test_log Print test log in /tmp with each test name (conflict with -s) -t | --test COLLECTION:TEST Run TEST from COLLECTION -c | --collection COLLECTION Run all tests from COLLECTION -l | --list List the available collection:test entries -d | --dry-run Don't actually run any tests + -n | --netns Run each test in namespace -h | --help Show this usage info -o | --override-timeout Number of seconds after which we timeout EOF @@ -41,6 +43,9 @@ while true; do logfile="$BASE_DIR"/output.log cat /dev/null > $logfile shift ;; + -p | --per-test-log) + per_test_logging=1 + shift ;; -t | --test) TESTS="$TESTS $2" shift 2 ;; @@ -53,6 +58,9 @@ while true; do -d | --dry-run) dryrun="echo" shift ;; + -n | --netns) + RUN_IN_NETNS=1 + shift ;; -o | --override-timeout) kselftest_override_timeout="$2" shift 2 ;; diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 012aa33b341b..c60acba951c2 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -82,7 +82,6 @@ CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_SAMPLE=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 407fa53822a0..c53ede8b730d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -20,7 +20,6 @@ try_modprobe act_ct try_modprobe act_ctinfo try_modprobe act_gact try_modprobe act_gate -try_modprobe act_ipt try_modprobe act_mirred try_modprobe act_mpls try_modprobe act_nat |