summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/core-api/idr.rst2
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ksz90x1.txt28
-rw-r--r--Documentation/networking/00-INDEX20
-rw-r--r--Documentation/networking/e100.rst3
-rw-r--r--Documentation/networking/e1000.rst3
-rw-r--r--Documentation/networking/e1000e.rst382
-rw-r--r--Documentation/networking/e1000e.txt312
-rw-r--r--Documentation/networking/fm10k.rst141
-rw-r--r--Documentation/networking/i40e.rst770
-rw-r--r--Documentation/networking/i40e.txt190
-rw-r--r--Documentation/networking/iavf.rst281
-rw-r--r--Documentation/networking/iavf.txt56
-rw-r--r--Documentation/networking/ice.rst45
-rw-r--r--Documentation/networking/ice.txt39
-rw-r--r--Documentation/networking/igb.rst193
-rw-r--r--Documentation/networking/igb.txt129
-rw-r--r--Documentation/networking/igbvf.rst64
-rw-r--r--Documentation/networking/igbvf.txt80
-rw-r--r--Documentation/networking/index.rst10
-rw-r--r--Documentation/networking/ixgb.rst467
-rw-r--r--Documentation/networking/ixgb.txt433
-rw-r--r--Documentation/networking/ixgbe.rst527
-rw-r--r--Documentation/networking/ixgbe.txt349
-rw-r--r--Documentation/networking/ixgbevf.rst66
-rw-r--r--Documentation/networking/ixgbevf.txt52
-rw-r--r--LICENSES/other/CC-BY-SA-4.0397
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile2
-rw-r--r--arch/arm/kvm/coproc.c8
-rw-r--r--arch/arm64/kernel/perf_event.c7
-rw-r--r--arch/arm64/kernel/setup.c56
-rw-r--r--arch/parisc/kernel/unwind.c2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h4
-rw-r--r--arch/sparc/include/asm/cpudata_64.h2
-rw-r--r--arch/sparc/include/uapi/asm/unistd.h3
-rw-r--r--arch/sparc/kernel/auxio_64.c4
-rw-r--r--arch/sparc/kernel/perf_event.c26
-rw-r--r--arch/sparc/kernel/power.c4
-rw-r--r--arch/sparc/kernel/prom_32.c26
-rw-r--r--arch/sparc/kernel/prom_64.c68
-rw-r--r--arch/sparc/kernel/rtrap_64.S3
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/sparc/vdso/vclock_gettime.c12
-rw-r--r--arch/sparc/vdso/vma.c4
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/entry/entry_32.S13
-rw-r--r--arch/x86/entry/entry_64.S13
-rw-r--r--arch/x86/include/asm/fpu/internal.h2
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/kernel/fpu/signal.c1
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--block/blk-lib.c28
-rw-r--r--block/blk-wbt.c2
-rw-r--r--drivers/atm/eni.c3
-rw-r--r--drivers/atm/zatm.c42
-rw-r--r--drivers/block/sunvdc.c14
-rw-r--r--drivers/bluetooth/btsdio.c14
-rw-r--r--drivers/bluetooth/btusb.c1
-rw-r--r--drivers/bluetooth/hci_qca.c19
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun4i-a10.c10
-rw-r--r--drivers/crypto/chelsio/chcr_core.c4
-rw-r--r--drivers/gpu/drm/drm_atomic.c5
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c12
-rw-r--r--drivers/gpu/drm/drm_crtc.c10
-rw-r--r--drivers/gpu/drm/drm_edid.c5
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c91
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_dotclock.c12
-rw-r--r--drivers/i2c/i2c-core-base.c2
-rw-r--r--drivers/infiniband/core/ucm.c3
-rw-r--r--drivers/infiniband/core/ucma.c3
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c31
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c6
-rw-r--r--drivers/infiniband/hw/mlx5/main.c13
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c4
-rw-r--r--drivers/input/mouse/elan_i2c_core.c1
-rw-r--r--drivers/net/bonding/bond_main.c3
-rw-r--r--drivers/net/dsa/bcm_sf2.c6
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c4
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c25
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.h1
-rw-r--r--drivers/net/dsa/qca8k.c6
-rw-r--r--drivers/net/ethernet/amazon/Kconfig2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c18
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c7
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c46
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h2
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c6
-rw-r--r--drivers/net/ethernet/freescale/fec.h4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c137
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c121
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h27
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c14
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h97
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c23
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_port.c32
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_port.h18
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_tx.c295
-rw-r--r--drivers/net/ethernet/intel/Kconfig42
-rw-r--r--drivers/net/ethernet/intel/Makefile1
-rw-r--r--drivers/net/ethernet/intel/igc/Makefile10
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h443
-rw-r--r--drivers/net/ethernet/intel/igc/igc_base.c541
-rw-r--r--drivers/net/ethernet/intel/igc/igc_base.h107
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h389
-rw-r--r--drivers/net/ethernet/intel/igc/igc_hw.h321
-rw-r--r--drivers/net/ethernet/intel/igc/igc_i225.c490
-rw-r--r--drivers/net/ethernet/intel/igc/igc_i225.h13
-rw-r--r--drivers/net/ethernet/intel/igc/igc_mac.c806
-rw-r--r--drivers/net/ethernet/intel/igc/igc_mac.h41
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c3901
-rw-r--r--drivers/net/ethernet/intel/igc/igc_nvm.c215
-rw-r--r--drivers/net/ethernet/intel/igc/igc_nvm.h14
-rw-r--r--drivers/net/ethernet/intel/igc/igc_phy.c791
-rw-r--r--drivers/net/ethernet/intel/igc/igc_phy.h21
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h221
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/Makefile2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c244
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h47
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/common.h161
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h206
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c152
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h117
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c320
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c892
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c475
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h808
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c208
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c128
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c539
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c419
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c195
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h89
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c225
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c982
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c249
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c138
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c552
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c51
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c3
-rw-r--r--drivers/net/ethernet/nxp/lpc_eth.c75
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h54
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c259
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c201
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h51
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_vf.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c95
-rw-r--r--drivers/net/ethernet/qlogic/qla3xxx.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c103
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c5
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c3
-rw-r--r--drivers/net/ethernet/ti/cpsw.c6
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c6
-rw-r--r--drivers/net/ethernet/wiznet/w5300.c6
-rw-r--r--drivers/net/geneve.c14
-rw-r--r--drivers/net/loopback.c4
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/phy/mdio-mux-bcm-iproc.c6
-rw-r--r--drivers/net/phy/micrel.c130
-rw-r--r--drivers/net/phy/mscc.c4
-rw-r--r--drivers/net/team/team.c5
-rw-r--r--drivers/net/virtio_net.c5
-rw-r--r--drivers/net/vxlan.c188
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/perf/arm_pmu.c8
-rw-r--r--drivers/ptp/ptp_chardev.c4
-rw-r--r--drivers/usb/class/cdc-acm.c16
-rw-r--r--drivers/usb/core/devio.c4
-rw-r--r--drivers/usb/gadget/function/f_mass_storage.c3
-rw-r--r--drivers/usb/host/xhci-pci.c6
-rw-r--r--drivers/usb/roles/intel-xhci-usb-role-switch.c2
-rw-r--r--drivers/usb/usbip/vhci_hcd.c57
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/server.c2
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/dax.c13
-rw-r--r--fs/fat/fatent.c1
-rw-r--r--fs/fscache/cookie.c31
-rw-r--r--fs/fscache/internal.h1
-rw-r--r--fs/fscache/main.c4
-rw-r--r--fs/gfs2/bmap.c6
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--include/drm/drm_atomic.h11
-rw-r--r--include/drm/drm_edid.h6
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/micrel_phy.h1
-rw-r--r--include/linux/mlx5/cq.h1
-rw-r--r--include/linux/mlx5/device.h6
-rw-r--r--include/linux/mlx5/driver.h37
-rw-r--r--include/linux/mlx5/fs.h40
-rw-r--r--include/linux/mlx5/mlx5_ifc.h258
-rw-r--r--include/linux/mlx5/qp.h1
-rw-r--r--include/linux/mlx5/srq.h1
-rw-r--r--include/linux/module.h3
-rw-r--r--include/linux/netfilter/nfnetlink_osf.h3
-rw-r--r--include/linux/netpoll.h4
-rw-r--r--include/linux/perf/arm_pmu.h1
-rw-r--r--include/linux/qed/qed_if.h26
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/linux/tracepoint-defs.h6
-rw-r--r--include/linux/tracepoint.h36
-rw-r--r--include/net/bluetooth/l2cap.h19
-rw-r--r--include/net/dst.h10
-rw-r--r--include/net/inet_ecn.h18
-rw-r--r--include/net/ip6_fib.h4
-rw-r--r--include/net/netfilter/nf_flow_table.h2
-rw-r--r--include/net/netfilter/nfnetlink_log.h1
-rw-r--r--include/net/sctp/constants.h5
-rw-r--r--include/net/sctp/sm.h2
-rw-r--r--include/net/sctp/structs.h2
-rw-r--r--include/net/switchdev.h7
-rw-r--r--include/net/vxlan.h64
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h7
-rw-r--r--include/uapi/linux/netfilter/xt_quota.h8
-rw-r--r--include/uapi/linux/sctp.h1
-rw-r--r--kernel/bpf/xskmap.c10
-rw-r--r--kernel/sched/fair.c24
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/trace/preemptirq_delay_test.c10
-rw-r--r--kernel/trace/trace_events_hist.c32
-rw-r--r--kernel/tracepoint.c24
-rw-r--r--lib/test_ida.c4
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/mremap.c30
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/bluetooth/hci_event.c38
-rw-r--r--net/bluetooth/l2cap_core.c36
-rw-r--r--net/bpfilter/bpfilter_kern.c6
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_fdb.c4
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_switchdev.c9
-rw-r--r--net/core/ethtool.c11
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/dsa/slave.c3
-rw-r--r--net/ipv4/ipmr_base.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c1
-rw-r--r--net/ipv4/tcp_bbr.c77
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv4/tcp_ulp.c6
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/ip6_tunnel.c10
-rw-r--r--net/ipv6/mcast.c16
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/ipv6/xfrm6_policy.c4
-rw-r--r--net/llc/llc_conn.c1
-rw-r--r--net/ncsi/Kconfig6
-rw-r--r--net/ncsi/internal.h9
-rw-r--r--net/ncsi/ncsi-manage.c82
-rw-r--r--net/ncsi/ncsi-pkt.h8
-rw-r--r--net/ncsi/ncsi-rsp.c44
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h2
-rw-r--r--net/netfilter/nf_flow_table_core.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_osf.c46
-rw-r--r--net/netfilter/nft_dup_netdev.c2
-rw-r--r--net/netfilter/nft_flow_offload.c2
-rw-r--r--net/netfilter/nft_fwd_netdev.c4
-rw-r--r--net/netfilter/nft_osf.c15
-rw-r--r--net/netfilter/nft_xfrm.c11
-rw-r--r--net/netfilter/xt_osf.c8
-rw-r--r--net/netfilter/xt_quota.c55
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/local_object.c2
-rw-r--r--net/rxrpc/output.c3
-rw-r--r--net/rxrpc/peer_event.c1
-rw-r--r--net/sched/cls_api.c12
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/input.c1
-rw-r--r--net/sctp/output.c6
-rw-r--r--net/sctp/outqueue.c8
-rw-r--r--net/sctp/socket.c76
-rw-r--r--net/socket.c11
-rw-r--r--net/tipc/group.c1
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tipc/name_distr.c4
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xfrm/xfrm_hash.h5
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_interface.c6
-rw-r--r--net/xfrm/xfrm_policy.c8
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/include/uapi/linux/kvm.h1
-rw-r--r--tools/lib/api/fs/tracing_path.c4
-rw-r--r--tools/perf/Makefile.config2
-rw-r--r--tools/perf/Makefile.perf2
-rw-r--r--tools/perf/builtin-report.c1
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json16
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json16
-rw-r--r--tools/perf/util/event.c22
-rw-r--r--tools/perf/util/evsel.c3
-rw-r--r--tools/perf/util/pmu.c13
-rw-r--r--tools/perf/util/srcline.c3
-rwxr-xr-xtools/testing/selftests/drivers/usb/usbip/usbip_test.sh4
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc80
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c13
349 files changed, 21980 insertions, 4119 deletions
diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst
index d351e880a2f6..a2738050c4f0 100644
--- a/Documentation/core-api/idr.rst
+++ b/Documentation/core-api/idr.rst
@@ -1,4 +1,4 @@
-.. SPDX-License-Identifier: CC-BY-SA-4.0
+.. SPDX-License-Identifier: GPL-2.0+
=============
ID Allocation
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
index e22d8cfea687..5100358177c9 100644
--- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
@@ -1,4 +1,4 @@
-Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY
+Micrel KSZ9021/KSZ9031/KSZ9131 Gigabit Ethernet PHY
Some boards require special tuning values, particularly when it comes
to clock delays. You can specify clock delay values in the PHY OF
@@ -64,6 +64,32 @@ KSZ9031:
Attention: The link partner must be configurable as slave otherwise
no link will be established.
+KSZ9131:
+
+ All skew control options are specified in picoseconds. The increment
+ step is 100ps. Unlike KSZ9031, the values represent picoseccond delays.
+ A negative value can be assigned as rxc-skew-psec = <(-100)>;.
+
+ Optional properties:
+
+ Range of the value -700 to 2400, default value 0:
+
+ - rxc-skew-psec : Skew control of RX clock pad
+ - txc-skew-psec : Skew control of TX clock pad
+
+ Range of the value -700 to 800, default value 0:
+
+ - rxdv-skew-psec : Skew control of RX CTL pad
+ - txen-skew-psec : Skew control of TX CTL pad
+ - rxd0-skew-psec : Skew control of RX data 0 pad
+ - rxd1-skew-psec : Skew control of RX data 1 pad
+ - rxd2-skew-psec : Skew control of RX data 2 pad
+ - rxd3-skew-psec : Skew control of RX data 3 pad
+ - txd0-skew-psec : Skew control of TX data 0 pad
+ - txd1-skew-psec : Skew control of TX data 1 pad
+ - txd2-skew-psec : Skew control of TX data 2 pad
+ - txd3-skew-psec : Skew control of TX data 3 pad
+
Examples:
mdio {
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 2d239770b95f..eb0754475dd0 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -70,12 +70,6 @@ driver.txt
- Softnet driver issues.
ena.txt
- info on Amazon's Elastic Network Adapter (ENA)
-e100.txt
- - info on Intel's EtherExpress PRO/100 line of 10/100 boards
-e1000.txt
- - info on Intel's E1000 line of gigabit ethernet boards
-e1000e.txt
- - README for the Intel Gigabit Ethernet Driver (e1000e).
eql.txt
- serial IP load balancing
fib_trie.txt
@@ -94,16 +88,8 @@ generic_netlink.txt
- info on Generic Netlink
gianfar.txt
- Gianfar Ethernet Driver.
-i40e.txt
- - README for the Intel Ethernet Controller XL710 Driver (i40e).
-iavf.txt
- - README for the Intel Ethernet Adaptive Virtual Function Driver (iavf).
ieee802154.txt
- Linux IEEE 802.15.4 implementation, API and drivers
-igb.txt
- - README for the Intel Gigabit Ethernet Driver (igb).
-igbvf.txt
- - README for the Intel Gigabit Ethernet Driver (igbvf).
ip-sysctl.txt
- /proc/sys/net/ipv4/* variables
ip_dynaddr.txt
@@ -120,12 +106,6 @@ ipvs-sysctl.txt
- Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
irda.txt
- where to get IrDA (infrared) utilities and info for Linux.
-ixgb.txt
- - README for the Intel 10 Gigabit Ethernet Driver (ixgb).
-ixgbe.txt
- - README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
-ixgbevf.txt
- - README for the Intel Virtual Function (VF) Driver (ixgbevf).
l2tp.txt
- User guide to the L2TP tunnel protocol.
lapb-module.txt
diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst
index f81111eba9c5..5e2839b4ec92 100644
--- a/Documentation/networking/e100.rst
+++ b/Documentation/networking/e100.rst
@@ -1,4 +1,5 @@
-==============================================================
+.. SPDX-License-Identifier: GPL-2.0+
+
Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
==============================================================
diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst
index f10dd4086921..6379d4d20771 100644
--- a/Documentation/networking/e1000.rst
+++ b/Documentation/networking/e1000.rst
@@ -1,4 +1,5 @@
-===========================================================
+.. SPDX-License-Identifier: GPL-2.0+
+
Linux* Base Driver for Intel(R) Ethernet Network Connection
===========================================================
diff --git a/Documentation/networking/e1000e.rst b/Documentation/networking/e1000e.rst
new file mode 100644
index 000000000000..33554e5416c5
--- /dev/null
+++ b/Documentation/networking/e1000e.rst
@@ -0,0 +1,382 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Driver for Intel(R) Ethernet Network Connection
+======================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 2008-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Support
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+
+Command Line Parameters
+=======================
+If the driver is built as a module, the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax::
+
+ modprobe e1000e [<option>=<VAL1>,<VAL2>,...]
+
+There needs to be a <VAL#> for each network port in the system supported by
+this driver. The values will be applied to each instance, in function order.
+For example::
+
+ modprobe e1000e InterruptThrottleRate=16000,16000
+
+In this case, there are two network ports supported by e1000e in the system.
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+NOTE: A descriptor describes a data buffer and attributes related to the data
+buffer. This information is accessed by the hardware.
+
+InterruptThrottleRate
+---------------------
+:Valid Range: 0,1,3,4,100-100000
+:Default Value: 3
+
+Interrupt Throttle Rate controls the number of interrupts each interrupt
+vector can generate per second. Increasing ITR lowers latency at the cost of
+increased CPU utilization, though it may help throughput in some circumstances.
+
+Setting InterruptThrottleRate to a value greater or equal to 100
+will program the adapter to send out a maximum of that many interrupts
+per second, even if more packets have come in. This reduces interrupt
+load on the system and can lower CPU utilization under heavy load,
+but will increase latency as packets are not processed as quickly.
+
+The default behaviour of the driver previously assumed a static
+InterruptThrottleRate value of 8000, providing a good fallback value for
+all traffic types, but lacking in small packet performance and latency.
+The hardware can handle many more small packets per second however, and
+for this reason an adaptive interrupt moderation algorithm was implemented.
+
+The driver has two adaptive modes (setting 1 or 3) in which
+it dynamically adjusts the InterruptThrottleRate value based on the traffic
+that it receives. After determining the type of incoming traffic in the last
+timeframe, it will adjust the InterruptThrottleRate to an appropriate value
+for that traffic.
+
+The algorithm classifies the incoming traffic every interval into
+classes. Once the class is determined, the InterruptThrottleRate value is
+adjusted to suit that traffic type the best. There are three classes defined:
+"Bulk traffic", for large amounts of packets of normal size; "Low latency",
+for small amounts of traffic and/or a significant percentage of small
+packets; and "Lowest latency", for almost completely small packets or
+minimal traffic.
+
+ - 0: Off
+ Turns off any interrupt moderation and may improve small packet latency.
+ However, this is generally not suitable for bulk throughput traffic due
+ to the increased CPU utilization of the higher interrupt rate.
+ - 1: Dynamic mode
+ This mode attempts to moderate interrupts per vector while maintaining
+ very low latency. This can sometimes cause extra CPU utilization. If
+ planning on deploying e1000e in a latency sensitive environment, this
+ parameter should be considered.
+ - 3: Dynamic Conservative mode (default)
+ In dynamic conservative mode, the InterruptThrottleRate value is set to
+ 4000 for traffic that falls in class "Bulk traffic". If traffic falls in
+ the "Low latency" or "Lowest latency" class, the InterruptThrottleRate is
+ increased stepwise to 20000. This default mode is suitable for most
+ applications.
+ - 4: Simplified Balancing mode
+ In simplified mode the interrupt rate is based on the ratio of TX and
+ RX traffic. If the bytes per second rate is approximately equal, the
+ interrupt rate will drop as low as 2000 interrupts per second. If the
+ traffic is mostly transmit or mostly receive, the interrupt rate could
+ be as high as 8000.
+ - 100-100000:
+ Setting InterruptThrottleRate to a value greater or equal to 100
+ will program the adapter to send at most that many interrupts per second,
+ even if more packets have come in. This reduces interrupt load on the
+ system and can lower CPU utilization under heavy load, but will increase
+ latency as packets are not processed as quickly.
+
+NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+RxAbsIntDelay parameters. In other words, minimizing the receive and/or
+transmit absolute delays does not force the controller to generate more
+interrupts than what the Interrupt Throttle Rate allows.
+
+RxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 0
+
+This value delays the generation of receive interrupts in units of 1.024
+microseconds. Receive interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. Increasing this value adds extra
+latency to frame reception and can end up decreasing the throughput of TCP
+traffic. If the system is reporting dropped receives, this value may be set
+too high, causing the driver to run out of available receive descriptors.
+
+CAUTION: When setting RxIntDelay to a value other than 0, adapters may hang
+(stop transmitting) under certain network conditions. If this occurs a NETDEV
+WATCHDOG message is logged in the system event log. In addition, the
+controller is automatically reset, restoring the network connection. To
+eliminate the potential for the hang ensure that RxIntDelay is set to 0.
+
+RxAbsIntDelay
+-------------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 8
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+receive interrupt is generated. This value ensures that an interrupt is
+generated after the initial packet is received within the set amount of time,
+which is useful only if RxIntDelay is non-zero. Proper tuning, along with
+RxIntDelay, may improve traffic throughput in specific network conditions.
+
+TxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 8
+
+This value delays the generation of transmit interrupts in units of 1.024
+microseconds. Transmit interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. If the system is reporting
+dropped transmits, this value may be set too high causing the driver to run
+out of available transmit descriptors.
+
+TxAbsIntDelay
+-------------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 32
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+transmit interrupt is generated. It is useful only if TxIntDelay is non-zero.
+It ensures that an interrupt is generated after the initial Packet is sent on
+the wire within the set amount of time. Proper tuning, along with TxIntDelay,
+may improve traffic throughput in specific network conditions.
+
+copybreak
+---------
+:Valid Range: 0-xxxxxxx (0=off)
+:Default Value: 256
+
+The driver copies all packets below or equaling this size to a fresh receive
+buffer before handing it up the stack.
+This parameter differs from other parameters because it is a single (not 1,1,1
+etc.) parameter applied to all driver instances and it is also available
+during runtime at /sys/module/e1000e/parameters/copybreak.
+
+To use copybreak, type::
+
+ modprobe e1000e.ko copybreak=128
+
+SmartPowerDownEnable
+--------------------
+:Valid Range: 0,1
+:Default Value: 0 (disabled)
+
+Allows the PHY to turn off in lower power states. The user can turn off this
+parameter in supported chipsets.
+
+KumeranLockLoss
+---------------
+:Valid Range: 0,1
+:Default Value: 1 (enabled)
+
+This workaround skips resetting the PHY at shutdown for the initial silicon
+releases of ICH8 systems.
+
+IntMode
+-------
+:Valid Range: 0-2
+:Default Value: 0
+
+ +-------+----------------+
+ | Value | Interrupt Mode |
+ +=======+================+
+ | 0 | Legacy |
+ +-------+----------------+
+ | 1 | MSI |
+ +-------+----------------+
+ | 2 | MSI-X |
+ +-------+----------------+
+
+IntMode allows load time control over the type of interrupt registered for by
+the driver. MSI-X is required for multiple queue support, and some kernels and
+combinations of kernel .config options will force a lower level of interrupt
+support.
+
+This command will show different values for each type of interrupt::
+
+ cat /proc/interrupts
+
+CrcStripping
+------------
+:Valid Range: 0,1
+:Default Value: 1 (enabled)
+
+Strip the CRC from received packets before sending up the network stack. If
+you have a machine with a BMC enabled but cannot receive IPMI traffic after
+loading or enabling the driver, try disabling this feature.
+
+WriteProtectNVM
+---------------
+:Valid Range: 0,1
+:Default Value: 1 (enabled)
+
+If set to 1, configure the hardware to ignore all write/erase cycles to the
+GbE region in the ICHx NVM (in order to prevent accidental corruption of the
+NVM). This feature can be disabled by setting the parameter to 0 during initial
+driver load.
+
+NOTE: The machine must be power cycled (full off/on) when enabling NVM writes
+via setting the parameter to zero. Once the NVM has been locked (via the
+parameter at 1 when the driver loads) it cannot be unlocked except via power
+cycle.
+
+Debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level of debug messages displayed in the system logs.
+
+
+Additional Features and Configurations
+======================================
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+ ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ ip link set mtu 9000 dev eth<x>
+ ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file:
+
+- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x>
+- For SLES: /etc/sysconfig/network/<config_file>
+
+NOTE: The maximum MTU setting for Jumbo Frames is 8996. This value coincides
+with the maximum Jumbo Frames size of 9018 bytes.
+
+NOTE: Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+poor performance or loss of link.
+
+NOTE: The following adapters limit Jumbo Frames sized packets to a maximum of
+4088 bytes:
+
+ - Intel(R) 82578DM Gigabit Network Connection
+ - Intel(R) 82577LM Gigabit Network Connection
+
+The following adapters do not support Jumbo Frames:
+
+ - Intel(R) PRO/1000 Gigabit Server Adapter
+ - Intel(R) PRO/1000 PM Network Connection
+ - Intel(R) 82562G 10/100 Network Connection
+ - Intel(R) 82562G-2 10/100 Network Connection
+ - Intel(R) 82562GT 10/100 Network Connection
+ - Intel(R) 82562GT-2 10/100 Network Connection
+ - Intel(R) 82562V 10/100 Network Connection
+ - Intel(R) 82562V-2 10/100 Network Connection
+ - Intel(R) 82566DC Gigabit Network Connection
+ - Intel(R) 82566DC-2 Gigabit Network Connection
+ - Intel(R) 82566DM Gigabit Network Connection
+ - Intel(R) 82566MC Gigabit Network Connection
+ - Intel(R) 82566MM Gigabit Network Connection
+ - Intel(R) 82567V-3 Gigabit Network Connection
+ - Intel(R) 82577LC Gigabit Network Connection
+ - Intel(R) 82578DC Gigabit Network Connection
+
+NOTE: Jumbo Frames cannot be configured on an 82579-based Network device if
+MACSec is enabled on the system.
+
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+
+https://www.kernel.org/pub/software/network/ethtool/
+
+NOTE: When validating enable/disable tests on some parts (for example, 82578),
+it is necessary to add a few seconds between tests when working with ethtool.
+
+
+Speed and Duplex Configuration
+------------------------------
+In addressing speed and duplex configuration issues, you need to distinguish
+between copper-based adapters and fiber-based adapters.
+
+In the default mode, an Intel(R) Ethernet Network Adapter using copper
+connections will attempt to auto-negotiate with its link partner to determine
+the best setting. If the adapter cannot establish link with the link partner
+using auto-negotiation, you may need to manually configure the adapter and link
+partner to identical settings to establish link and pass packets. This should
+only be needed when attempting to link with an older switch that does not
+support auto-negotiation or one that has been forced to a specific speed or
+duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds
+and higher cannot be forced. Use the autonegotiation advertising setting to
+manually set devices for 1 Gbps and higher.
+
+Speed, duplex, and autonegotiation advertising are configured through the
+ethtool* utility.
+
+Caution: Only experienced network administrators should force speed and duplex
+or change autonegotiation advertising manually. The settings at the switch must
+always match the adapter settings. Adapter performance may suffer or your
+adapter may not operate if you configure the adapter differently from your
+switch.
+
+An Intel(R) Ethernet Network Adapter using fiber-based connections, however,
+will not attempt to auto-negotiate with its link partner since those adapters
+operate only in full duplex and only at their native speed.
+
+
+Enabling Wake on LAN* (WoL)
+---------------------------
+WoL is configured through the ethtool* utility.
+
+WoL will be enabled on the system during the next shut down or reboot. For
+this driver version, in order to enable WoL, the e1000e driver must be loaded
+prior to shutting down or suspending the system.
+
+NOTE: Wake on LAN is only supported on port A for the following devices:
+- Intel(R) PRO/1000 PT Dual Port Network Connection
+- Intel(R) PRO/1000 PT Dual Port Server Connection
+- Intel(R) PRO/1000 PT Dual Port Server Adapter
+- Intel(R) PRO/1000 PF Dual Port Server Adapter
+- Intel(R) PRO/1000 PT Quad Port Server Adapter
+- Intel(R) Gigabit PT Quad Port Server ExpressModule
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/e1000e.txt b/Documentation/networking/e1000e.txt
deleted file mode 100644
index 12089547baed..000000000000
--- a/Documentation/networking/e1000e.txt
+++ /dev/null
@@ -1,312 +0,0 @@
-Linux* Driver for Intel(R) Ethernet Network Connection
-======================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Command Line Parameters
-- Additional Configurations
-- Support
-
-Identifying Your Adapter
-========================
-
-The e1000e driver supports all PCI Express Intel(R) Gigabit Network
-Connections, except those that are 82575, 82576 and 82580-based*.
-
-* NOTE: The Intel(R) PRO/1000 P Dual Port Server Adapter is supported by
- the e1000 driver, not the e1000e driver due to the 82546 part being used
- behind a PCI Express bridge.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/go/network/adapter/idguide.htm
-
-For the latest Intel network drivers for Linux, refer to the following
-website. In the search field, enter your adapter name or type, or use the
-networking link on the left to search for your adapter:
-
- http://support.intel.com/support/go/network/adapter/home.htm
-
-Command Line Parameters
-=======================
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-NOTES: For more information about the InterruptThrottleRate,
- RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay
- parameters, see the application note at:
- http://www.intel.com/design/network/applnots/ap450.htm
-
-InterruptThrottleRate
----------------------
-Valid Range: 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative,
- 4=simplified balancing)
-Default Value: 3
-
-The driver can limit the amount of interrupts per second that the adapter
-will generate for incoming packets. It does this by writing a value to the
-adapter that is based on the maximum amount of interrupts that the adapter
-will generate per second.
-
-Setting InterruptThrottleRate to a value greater or equal to 100
-will program the adapter to send out a maximum of that many interrupts
-per second, even if more packets have come in. This reduces interrupt
-load on the system and can lower CPU utilization under heavy load,
-but will increase latency as packets are not processed as quickly.
-
-The default behaviour of the driver previously assumed a static
-InterruptThrottleRate value of 8000, providing a good fallback value for
-all traffic types, but lacking in small packet performance and latency.
-The hardware can handle many more small packets per second however, and
-for this reason an adaptive interrupt moderation algorithm was implemented.
-
-The driver has two adaptive modes (setting 1 or 3) in which
-it dynamically adjusts the InterruptThrottleRate value based on the traffic
-that it receives. After determining the type of incoming traffic in the last
-timeframe, it will adjust the InterruptThrottleRate to an appropriate value
-for that traffic.
-
-The algorithm classifies the incoming traffic every interval into
-classes. Once the class is determined, the InterruptThrottleRate value is
-adjusted to suit that traffic type the best. There are three classes defined:
-"Bulk traffic", for large amounts of packets of normal size; "Low latency",
-for small amounts of traffic and/or a significant percentage of small
-packets; and "Lowest latency", for almost completely small packets or
-minimal traffic.
-
-In dynamic conservative mode, the InterruptThrottleRate value is set to 4000
-for traffic that falls in class "Bulk traffic". If traffic falls in the "Low
-latency" or "Lowest latency" class, the InterruptThrottleRate is increased
-stepwise to 20000. This default mode is suitable for most applications.
-
-For situations where low latency is vital such as cluster or
-grid computing, the algorithm can reduce latency even more when
-InterruptThrottleRate is set to mode 1. In this mode, which operates
-the same as mode 3, the InterruptThrottleRate will be increased stepwise to
-70000 for traffic in class "Lowest latency".
-
-In simplified mode the interrupt rate is based on the ratio of TX and
-RX traffic. If the bytes per second rate is approximately equal, the
-interrupt rate will drop as low as 2000 interrupts per second. If the
-traffic is mostly transmit or mostly receive, the interrupt rate could
-be as high as 8000.
-
-Setting InterruptThrottleRate to 0 turns off any interrupt moderation
-and may improve small packet latency, but is generally not suitable
-for bulk throughput traffic.
-
-NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
- RxAbsIntDelay parameters. In other words, minimizing the receive
- and/or transmit absolute delays does not force the controller to
- generate more interrupts than what the Interrupt Throttle Rate
- allows.
-
-NOTE: When e1000e is loaded with default settings and multiple adapters
- are in use simultaneously, the CPU utilization may increase non-
- linearly. In order to limit the CPU utilization without impacting
- the overall throughput, we recommend that you load the driver as
- follows:
-
- modprobe e1000e InterruptThrottleRate=3000,3000,3000
-
- This sets the InterruptThrottleRate to 3000 interrupts/sec for
- the first, second, and third instances of the driver. The range
- of 2000 to 3000 interrupts per second works on a majority of
- systems and is a good starting point, but the optimal value will
- be platform-specific. If CPU utilization is not a concern, use
- RX_POLLING (NAPI) and default driver settings.
-
-RxIntDelay
-----------
-Valid Range: 0-65535 (0=off)
-Default Value: 0
-
-This value delays the generation of receive interrupts in units of 1.024
-microseconds. Receive interrupt reduction can improve CPU efficiency if
-properly tuned for specific network traffic. Increasing this value adds
-extra latency to frame reception and can end up decreasing the throughput
-of TCP traffic. If the system is reporting dropped receives, this value
-may be set too high, causing the driver to run out of available receive
-descriptors.
-
-CAUTION: When setting RxIntDelay to a value other than 0, adapters may
- hang (stop transmitting) under certain network conditions. If
- this occurs a NETDEV WATCHDOG message is logged in the system
- event log. In addition, the controller is automatically reset,
- restoring the network connection. To eliminate the potential
- for the hang ensure that RxIntDelay is set to 0.
-
-RxAbsIntDelay
--------------
-Valid Range: 0-65535 (0=off)
-Default Value: 8
-
-This value, in units of 1.024 microseconds, limits the delay in which a
-receive interrupt is generated. Useful only if RxIntDelay is non-zero,
-this value ensures that an interrupt is generated after the initial
-packet is received within the set amount of time. Proper tuning,
-along with RxIntDelay, may improve traffic throughput in specific network
-conditions.
-
-TxIntDelay
-----------
-Valid Range: 0-65535 (0=off)
-Default Value: 8
-
-This value delays the generation of transmit interrupts in units of
-1.024 microseconds. Transmit interrupt reduction can improve CPU
-efficiency if properly tuned for specific network traffic. If the
-system is reporting dropped transmits, this value may be set too high
-causing the driver to run out of available transmit descriptors.
-
-TxAbsIntDelay
--------------
-Valid Range: 0-65535 (0=off)
-Default Value: 32
-
-This value, in units of 1.024 microseconds, limits the delay in which a
-transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
-this value ensures that an interrupt is generated after the initial
-packet is sent on the wire within the set amount of time. Proper tuning,
-along with TxIntDelay, may improve traffic throughput in specific
-network conditions.
-
-Copybreak
----------
-Valid Range: 0-xxxxxxx (0=off)
-Default Value: 256
-
-Driver copies all packets below or equaling this size to a fresh RX
-buffer before handing it up the stack.
-
-This parameter is different than other parameters, in that it is a
-single (not 1,1,1 etc.) parameter applied to all driver instances and
-it is also available during runtime at
-/sys/module/e1000e/parameters/copybreak
-
-SmartPowerDownEnable
---------------------
-Valid Range: 0-1
-Default Value: 0 (disabled)
-
-Allows PHY to turn off in lower power states. The user can set this parameter
-in supported chipsets.
-
-KumeranLockLoss
----------------
-Valid Range: 0-1
-Default Value: 1 (enabled)
-
-This workaround skips resetting the PHY at shutdown for the initial
-silicon releases of ICH8 systems.
-
-IntMode
--------
-Valid Range: 0-2 (0=legacy, 1=MSI, 2=MSI-X)
-Default Value: 2
-
-Allows changing the interrupt mode at module load time, without requiring a
-recompile. If the driver load fails to enable a specific interrupt mode, the
-driver will try other interrupt modes, from least to most compatible. The
-interrupt order is MSI-X, MSI, Legacy. If specifying MSI (IntMode=1)
-interrupts, only MSI and Legacy will be attempted.
-
-CrcStripping
-------------
-Valid Range: 0-1
-Default Value: 1 (enabled)
-
-Strip the CRC from received packets before sending up the network stack. If
-you have a machine with a BMC enabled but cannot receive IPMI traffic after
-loading or enabling the driver, try disabling this feature.
-
-WriteProtectNVM
----------------
-Valid Range: 0,1
-Default Value: 1
-
-If set to 1, configure the hardware to ignore all write/erase cycles to the
-GbE region in the ICHx NVM (in order to prevent accidental corruption of the
-NVM). This feature can be disabled by setting the parameter to 0 during initial
-driver load.
-NOTE: The machine must be power cycled (full off/on) when enabling NVM writes
-via setting the parameter to zero. Once the NVM has been locked (via the
-parameter at 1 when the driver loads) it cannot be unlocked except via power
-cycle.
-
-Additional Configurations
-=========================
-
- Jumbo Frames
- ------------
- Jumbo Frames support is enabled by changing the MTU to a value larger than
- the default of 1500. Use the ifconfig command to increase the MTU size.
- For example:
-
- ifconfig eth<x> mtu 9000 up
-
- This setting is not saved across reboots.
-
- Notes:
-
- - The maximum MTU setting for Jumbo Frames is 9216. This value coincides
- with the maximum Jumbo Frames size of 9234 bytes.
-
- - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
- poor performance or loss of link.
-
- - Some adapters limit Jumbo Frames sized packets to a maximum of
- 4096 bytes and some adapters do not support Jumbo Frames.
-
- - Jumbo Frames cannot be configured on an 82579-based Network device, if
- MACSec is enabled on the system.
-
- ethtool
- -------
- The driver utilizes the ethtool interface for driver configuration and
- diagnostics, as well as displaying statistical information. We
- strongly recommend downloading the latest version of ethtool at:
-
- https://kernel.org/pub/software/network/ethtool/
-
- NOTE: When validating enable/disable tests on some parts (82578, for example)
- you need to add a few seconds between tests when working with ethtool.
-
- Speed and Duplex
- ----------------
- Speed and Duplex are configured through the ethtool* utility. For
- instructions, refer to the ethtool man page.
-
- Enabling Wake on LAN* (WoL)
- ---------------------------
- WoL is configured through the ethtool* utility. For instructions on
- enabling WoL with ethtool, refer to the ethtool man page.
-
- WoL will be enabled on the system during the next shut down or reboot.
- For this driver version, in order to enable WoL, the e1000e driver must be
- loaded when shutting down or rebooting the system.
-
- In most cases Wake On LAN is only supported on port A for multiple port
- adapters. To verify if a port supports Wake on Lan run ethtool eth<X>.
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- www.intel.com/support/
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/fm10k.rst b/Documentation/networking/fm10k.rst
new file mode 100644
index 000000000000..bf5e5942f28d
--- /dev/null
+++ b/Documentation/networking/fm10k.rst
@@ -0,0 +1,141 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for Intel(R) Ethernet Multi-host Controller
+==============================================================
+
+August 20, 2018
+Copyright(c) 2015-2018 Intel Corporation.
+
+Contents
+========
+- Identifying Your Adapter
+- Additional Configurations
+- Performance Tuning
+- Known Issues
+- Support
+
+Identifying Your Adapter
+========================
+The driver in this release is compatible with devices based on the Intel(R)
+Ethernet Multi-host Controller.
+
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Flow Control
+------------
+The Intel(R) Ethernet Switch Host Interface Driver does not support Flow
+Control. It will not send pause frames. This may result in dropped frames.
+
+
+Virtual Functions (VFs)
+-----------------------
+Use sysfs to enable VFs.
+Valid Range: 0-64
+
+For example::
+
+ echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs //enable VFs
+ echo 0 > /sys/class/net/$dev/device/sriov_numvfs //disable VFs
+
+NOTE: Neither the device nor the driver control how VFs are mapped into config
+space. Bus layout will vary by operating system. On operating systems that
+support it, you can check sysfs to find the mapping.
+
+NOTE: When SR-IOV mode is enabled, hardware VLAN filtering and VLAN tag
+stripping/insertion will remain enabled. Please remove the old VLAN filter
+before the new VLAN filter is added. For example::
+
+ ip link set eth0 vf 0 vlan 100 // set vlan 100 for VF 0
+ ip link set eth0 vf 0 vlan 0 // Delete vlan 100
+ ip link set eth0 vf 0 vlan 200 // set a new vlan 200 for VF 0
+
+
+Additional Features and Configurations
+======================================
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+ ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ ip link set mtu 9000 dev eth<x>
+ ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file:
+
+- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x>
+- For SLES: /etc/sysconfig/network/<config_file>
+
+NOTE: The maximum MTU setting for Jumbo Frames is 15342. This value coincides
+with the maximum Jumbo Frames size of 15364 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+
+Generic Receive Offload, aka GRO
+--------------------------------
+The driver supports the in-kernel software implementation of GRO. GRO has
+shown that by coalescing Rx traffic into larger chunks of data, CPU
+utilization can be significantly reduced when under large Rx load. GRO is an
+evolution of the previously-used LRO interface. GRO is able to coalesce
+other protocols besides TCP. It's also safe to use with configurations that
+are problematic for LRO, namely bridging and iSCSI.
+
+
+
+Supported ethtool Commands and Options for Filtering
+----------------------------------------------------
+-n --show-nfc
+ Retrieves the receive network flow classification configurations.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6
+ Retrieves the hash options for the specified network traffic type.
+
+-N --config-nfc
+ Configures the receive network flow classification.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r
+ Configures the hash options for the specified network traffic type.
+
+- udp4: UDP over IPv4
+- udp6: UDP over IPv6
+- f Hash on bytes 0 and 1 of the Layer 4 header of the rx packet.
+- n Hash on bytes 2 and 3 of the Layer 4 header of the rx packet.
+
+
+Known Issues/Troubleshooting
+============================
+
+Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS under Linux KVM
+---------------------------------------------------------------------------------------
+KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
+includes traditional PCIe devices, as well as SR-IOV-capable devices based on
+the Intel Ethernet Controller XL710.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/i40e.rst b/Documentation/networking/i40e.rst
new file mode 100644
index 000000000000..0cc16c525d10
--- /dev/null
+++ b/Documentation/networking/i40e.rst
@@ -0,0 +1,770 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series
+==================================================================
+
+Intel 40 Gigabit Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Overview
+- Identifying Your Adapter
+- Intel(R) Ethernet Flow Director
+- Additional Configurations
+- Known Issues
+- Support
+
+
+Driver information can be obtained using ethtool, lspci, and ifconfig.
+Instructions on updating ethtool can be found in the section Additional
+Configurations later in this document.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+
+Identifying Your Adapter
+========================
+The driver is compatible with devices based on the following:
+
+ * Intel(R) Ethernet Controller X710
+ * Intel(R) Ethernet Controller XL710
+ * Intel(R) Ethernet Network Connection X722
+ * Intel(R) Ethernet Controller XXV710
+
+For the best performance, make sure the latest NVM/FW is installed on your
+device.
+
+For information on how to identify your adapter, and for the latest NVM/FW
+images and Intel network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+SFP+ and QSFP+ Devices
+----------------------
+For information about supported media, refer to this document:
+https://www.intel.com/content/dam/www/public/us/en/documents/release-notes/xl710-ethernet-controller-feature-matrix.pdf
+
+NOTE: Some adapters based on the Intel(R) Ethernet Controller 700 Series only
+support Intel Ethernet Optics modules. On these adapters, other modules are not
+supported and will not function. In all cases Intel recommends using Intel
+Ethernet Optics; other modules may function but are not validated by Intel.
+Contact Intel for supported media types.
+
+NOTE: For connections based on Intel(R) Ethernet Controller 700 Series, support
+is dependent on your system board. Please see your vendor for details.
+
+NOTE: In systems that do not have adequate airflow to cool the adapter and
+optical modules, you must use high temperature optical modules.
+
+Virtual Functions (VFs)
+-----------------------
+Use sysfs to enable VFs. For example::
+
+ #echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs #enable VFs
+ #echo 0 > /sys/class/net/$dev/device/sriov_numvfs #disable VFs
+
+For example, the following instructions will configure PF eth0 and the first VF
+on VLAN 10::
+
+ $ ip link set dev eth0 vf 0 vlan 10
+
+VLAN Tag Packet Steering
+------------------------
+Allows you to send all packets with a specific VLAN tag to a particular SR-IOV
+virtual function (VF). Further, this feature allows you to designate a
+particular VF as trusted, and allows that trusted VF to request selective
+promiscuous mode on the Physical Function (PF).
+
+To set a VF as trusted or untrusted, enter the following command in the
+Hypervisor::
+
+ # ip link set dev eth0 vf 1 trust [on|off]
+
+Once the VF is designated as trusted, use the following commands in the VM to
+set the VF to promiscuous mode.
+
+::
+
+ For promiscuous all:
+ #ip link set eth2 promisc on
+ Where eth2 is a VF interface in the VM
+
+ For promiscuous Multicast:
+ #ip link set eth2 allmulticast on
+ Where eth2 is a VF interface in the VM
+
+NOTE: By default, the ethtool priv-flag vf-true-promisc-support is set to
+"off",meaning that promiscuous mode for the VF will be limited. To set the
+promiscuous mode for the VF to true promiscuous and allow the VF to see all
+ingress traffic, use the following command::
+
+ #ethtool -set-priv-flags p261p1 vf-true-promisc-support on
+
+The vf-true-promisc-support priv-flag does not enable promiscuous mode; rather,
+it designates which type of promiscuous mode (limited or true) you will get
+when you enable promiscuous mode using the ip link commands above. Note that
+this is a global setting that affects the entire device. However,the
+vf-true-promisc-support priv-flag is only exposed to the first PF of the
+device. The PF remains in limited promiscuous mode (unless it is in MFP mode)
+regardless of the vf-true-promisc-support setting.
+
+Now add a VLAN interface on the VF interface::
+
+ #ip link add link eth2 name eth2.100 type vlan id 100
+
+Note that the order in which you set the VF to promiscuous mode and add the
+VLAN interface does not matter (you can do either first). The end result in
+this example is that the VF will get all traffic that is tagged with VLAN 100.
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+The Intel Ethernet Flow Director performs the following tasks:
+
+- Directs receive packets according to their flows to different queues.
+- Enables tight control on routing a flow in the platform.
+- Matches flows and CPU cores for flow affinity.
+- Supports multiple parameters for flexible flow classification and load
+ balancing (in SFP mode only).
+
+NOTE: The Linux i40e driver supports the following flow types: IPv4, TCPv4, and
+UDPv4. For a given flow type, it supports valid combinations of IP addresses
+(source or destination) and UDP/TCP ports (source and destination). For
+example, you can supply only a source IP address, a source IP address and a
+destination port, or any combination of one or more of these four parameters.
+
+NOTE: The Linux i40e driver allows you to filter traffic based on a
+user-defined flexible two-byte pattern and offset by using the ethtool user-def
+and mask fields. Only L3 and L4 flow types are supported for user-defined
+flexible filters. For a given flow type, you must clear all Intel Ethernet Flow
+Director filters before changing the input set (for that flow type).
+
+To enable or disable the Intel Ethernet Flow Director::
+
+ # ethtool -K ethX ntuple <on|off>
+
+When disabling ntuple filters, all the user programmed filters are flushed from
+the driver cache and hardware. All needed filters must be re-added when ntuple
+is re-enabled.
+
+To add a filter that directs packet to queue 2, use -U or -N switch::
+
+ # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1]
+
+To set a filter using only the source and destination IP address::
+
+ # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 action 2 [loc 1]
+
+To see the list of filters currently present::
+
+ # ethtool <-u|-n> ethX
+
+Application Targeted Routing (ATR) Perfect Filters
+--------------------------------------------------
+ATR is enabled by default when the kernel is in multiple transmit queue mode.
+An ATR Intel Ethernet Flow Director filter rule is added when a TCP-IP flow
+starts and is deleted when the flow ends. When a TCP-IP Intel Ethernet Flow
+Director rule is added from ethtool (Sideband filter), ATR is turned off by the
+driver. To re-enable ATR, the sideband can be disabled with the ethtool -K
+option. For example::
+
+ ethtool –K [adapter] ntuple [off|on]
+
+If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a
+TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is
+automatically re-enabled.
+
+Packets that match the ATR rules are counted in fdir_atr_match stats in
+ethtool, which also can be used to verify whether ATR rules still exist.
+
+Sideband Perfect Filters
+------------------------
+Sideband Perfect Filters are used to direct traffic that matches specified
+characteristics. They are enabled through ethtool's ntuple interface. To add a
+new filter use the following command::
+
+ ethtool -U <device> flow-type <type> src-ip <ip> dst-ip <ip> src-port <port> \
+ dst-port <port> action <queue>
+
+Where:
+ <device> - the ethernet device to program
+ <type> - can be ip4, tcp4, udp4, or sctp4
+ <ip> - the ip address to match on
+ <port> - the port number to match on
+ <queue> - the queue to direct traffic towards (-1 discards matching traffic)
+
+Use the following command to display all of the active filters::
+
+ ethtool -u <device>
+
+Use the following command to delete a filter::
+
+ ethtool -U <device> delete <N>
+
+Where <N> is the filter id displayed when printing all the active filters, and
+may also have been specified using "loc <N>" when adding the filter.
+
+The following example matches TCP traffic sent from 192.168.0.1, port 5300,
+directed to 192.168.0.5, port 80, and sends it to queue 7::
+
+ ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 \
+ src-port 5300 dst-port 80 action 7
+
+For each flow-type, the programmed filters must all have the same matching
+input set. For example, issuing the following two commands is acceptable::
+
+ ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10
+
+Issuing the next two commands, however, is not acceptable, since the first
+specifies src-ip and the second specifies dst-ip::
+
+ ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10
+
+The second command will fail with an error. You may program multiple filters
+with the same fields, using different values, but, on one device, you may not
+program two tcp4 filters with different matching fields.
+
+Matching on a sub-portion of a field is not supported by the i40e driver, thus
+partial mask fields are not supported.
+
+The driver also supports matching user-defined data within the packet payload.
+This flexible data is specified using the "user-def" field of the ethtool
+command in the following way:
+
++----------------------------+--------------------------+
+| 31 28 24 20 16 | 15 12 8 4 0 |
++----------------------------+--------------------------+
+| offset into packet payload | 2 bytes of flexible data |
++----------------------------+--------------------------+
+
+For example,
+
+::
+
+ ... user-def 0x4FFFF ...
+
+tells the filter to look 4 bytes into the payload and match that value against
+0xFFFF. The offset is based on the beginning of the payload, and not the
+beginning of the packet. Thus
+
+::
+
+ flow-type tcp4 ... user-def 0x8BEAF ...
+
+would match TCP/IPv4 packets which have the value 0xBEAF 8 bytes into the
+TCP/IPv4 payload.
+
+Note that ICMP headers are parsed as 4 bytes of header and 4 bytes of payload.
+Thus to match the first byte of the payload, you must actually add 4 bytes to
+the offset. Also note that ip4 filters match both ICMP frames as well as raw
+(unknown) ip4 frames, where the payload will be the L3 payload of the IP4 frame.
+
+The maximum offset is 64. The hardware will only read up to 64 bytes of data
+from the payload. The offset must be even because the flexible data is 2 bytes
+long and must be aligned to byte 0 of the packet payload.
+
+The user-defined flexible offset is also considered part of the input set and
+cannot be programmed separately for multiple filters of the same type. However,
+the flexible data is not part of the input set and multiple filters may use the
+same offset but match against different data.
+
+To create filters that direct traffic to a specific Virtual Function, use the
+"action" parameter. Specify the action as a 64 bit value, where the lower 32
+bits represents the queue number, while the next 8 bits represent which VF.
+Note that 0 is the PF, so the VF identifier is offset by 1. For example::
+
+ ... action 0x800000002 ...
+
+specifies to direct traffic to Virtual Function 7 (8 minus 1) into queue 2 of
+that VF.
+
+Note that these filters will not break internal routing rules, and will not
+route traffic that otherwise would not have been sent to the specified Virtual
+Function.
+
+Setting the link-down-on-close Private Flag
+-------------------------------------------
+When the link-down-on-close private flag is set to "on", the port's link will
+go down when the interface is brought down using the ifconfig ethX down command.
+
+Use ethtool to view and set link-down-on-close, as follows::
+
+ ethtool --show-priv-flags ethX
+ ethtool --set-priv-flags ethX link-down-on-close [on|off]
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+ dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+ ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ ip link set mtu 9000 dev eth<x>
+ ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file::
+
+ /etc/sysconfig/network-scripts/ifcfg-eth<x> // for RHEL
+ /etc/sysconfig/network/<config_file> // for SLES
+
+NOTE: The maximum MTU setting for Jumbo Frames is 9702. This value coincides
+with the maximum Jumbo Frames size of 9728 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://www.kernel.org/pub/software/network/ethtool/
+
+Supported ethtool Commands and Options for Filtering
+----------------------------------------------------
+-n --show-nfc
+ Retrieves the receive network flow classification configurations.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6
+ Retrieves the hash options for the specified network traffic type.
+
+-N --config-nfc
+ Configures the receive network flow classification.
+
+rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r...
+ Configures the hash options for the specified network traffic type.
+
+udp4 UDP over IPv4
+udp6 UDP over IPv6
+
+f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
+n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+
+Speed and Duplex Configuration
+------------------------------
+In addressing speed and duplex configuration issues, you need to distinguish
+between copper-based adapters and fiber-based adapters.
+
+In the default mode, an Intel(R) Ethernet Network Adapter using copper
+connections will attempt to auto-negotiate with its link partner to determine
+the best setting. If the adapter cannot establish link with the link partner
+using auto-negotiation, you may need to manually configure the adapter and link
+partner to identical settings to establish link and pass packets. This should
+only be needed when attempting to link with an older switch that does not
+support auto-negotiation or one that has been forced to a specific speed or
+duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds
+and higher cannot be forced. Use the autonegotiation advertising setting to
+manually set devices for 1 Gbps and higher.
+
+NOTE: You cannot set the speed for devices based on the Intel(R) Ethernet
+Network Adapter XXV710 based devices.
+
+Speed, duplex, and autonegotiation advertising are configured through the
+ethtool* utility.
+
+Caution: Only experienced network administrators should force speed and duplex
+or change autonegotiation advertising manually. The settings at the switch must
+always match the adapter settings. Adapter performance may suffer or your
+adapter may not operate if you configure the adapter differently from your
+switch.
+
+An Intel(R) Ethernet Network Adapter using fiber-based connections, however,
+will not attempt to auto-negotiate with its link partner since those adapters
+operate only in full duplex and only at their native speed.
+
+NAPI
+----
+NAPI (Rx polling mode) is supported in the i40e driver.
+For more information on NAPI, see
+https://wiki.linuxfoundation.org/networking/napi
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for i40e. When transmit is enabled,
+pause frames are generated when the receive packet buffer crosses a predefined
+threshold. When receive is enabled, the transmit unit will halt for the time
+delay specified when a pause frame is received.
+
+NOTE: You must have a flow control capable link partner.
+
+Flow Control is on by default.
+
+Use ethtool to change the flow control settings.
+
+To enable or disable Rx or Tx Flow Control::
+
+ ethtool -A eth? rx <on|off> tx <on|off>
+
+Note: This command only enables or disables Flow Control if auto-negotiation is
+disabled. If auto-negotiation is enabled, this command changes the parameters
+used for auto-negotiation with the link partner.
+
+To enable or disable auto-negotiation::
+
+ ethtool -s eth? autoneg <on|off>
+
+Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending
+on your device, you may not be able to change the auto-negotiation setting.
+
+RSS Hash Flow
+-------------
+Allows you to set the hash bytes per flow type and any combination of one or
+more options for Receive Side Scaling (RSS) hash byte configuration.
+
+::
+
+ # ethtool -N <dev> rx-flow-hash <type> <option>
+
+Where <type> is:
+ tcp4 signifying TCP over IPv4
+ udp4 signifying UDP over IPv4
+ tcp6 signifying TCP over IPv6
+ udp6 signifying UDP over IPv6
+And <option> is one or more of:
+ s Hash on the IP source address of the Rx packet.
+ d Hash on the IP destination address of the Rx packet.
+ f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet.
+ n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet.
+
+MAC and VLAN anti-spoofing feature
+----------------------------------
+When a malicious driver attempts to send a spoofed packet, it is dropped by the
+hardware and not transmitted.
+NOTE: This feature can be disabled for a specific Virtual Function (VF)::
+
+ ip link set <pf dev> vf <vf id> spoofchk {off|on}
+
+IEEE 1588 Precision Time Protocol (PTP) Hardware Clock (PHC)
+------------------------------------------------------------
+Precision Time Protocol (PTP) is used to synchronize clocks in a computer
+network. PTP support varies among Intel devices that support this driver. Use
+"ethtool -T <netdev name>" to get a definitive list of PTP capabilities
+supported by the device.
+
+IEEE 802.1ad (QinQ) Support
+---------------------------
+The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
+IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
+"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks
+allow L2 tunneling and the ability to segregate traffic within a particular
+VLAN ID, among other uses.
+
+The following are examples of how to configure 802.1ad (QinQ)::
+
+ ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24
+ ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371
+
+Where "24" and "371" are example VLAN IDs.
+
+NOTES:
+ Receive checksum offloads, cloud filters, and VLAN acceleration are not
+ supported for 802.1ad (QinQ) packets.
+
+VXLAN and GENEVE Overlay HW Offloading
+--------------------------------------
+Virtual Extensible LAN (VXLAN) allows you to extend an L2 network over an L3
+network, which may be useful in a virtualized or cloud environment. Some
+Intel(R) Ethernet Network devices perform VXLAN processing, offloading it from
+the operating system. This reduces CPU utilization.
+
+VXLAN offloading is controlled by the Tx and Rx checksum offload options
+provided by ethtool. That is, if Tx checksum offload is enabled, and the
+adapter has the capability, VXLAN offloading is also enabled.
+
+Support for VXLAN and GENEVE HW offloading is dependent on kernel support of
+the HW offloading features.
+
+Multiple Functions per Port
+---------------------------
+Some adapters based on the Intel Ethernet Controller X710/XL710 support
+multiple functions on a single physical port. Configure these functions through
+the System Setup/BIOS.
+
+Minimum TX Bandwidth is the guaranteed minimum data transmission bandwidth, as
+a percentage of the full physical port link speed, that the partition will
+receive. The bandwidth the partition is awarded will never fall below the level
+you specify.
+
+The range for the minimum bandwidth values is:
+1 to ((100 minus # of partitions on the physical port) plus 1)
+For example, if a physical port has 4 partitions, the range would be:
+1 to ((100 - 4) + 1 = 97)
+
+The Maximum Bandwidth percentage represents the maximum transmit bandwidth
+allocated to the partition as a percentage of the full physical port link
+speed. The accepted range of values is 1-100. The value is used as a limiter,
+should you chose that any one particular function not be able to consume 100%
+of a port's bandwidth (should it be available). The sum of all the values for
+Maximum Bandwidth is not restricted, because no more than 100% of a port's
+bandwidth can ever be used.
+
+NOTE: X710/XXV710 devices fail to enable Max VFs (64) when Multiple Functions
+per Port (MFP) and SR-IOV are enabled. An error from i40e is logged that says
+"add vsi failed for VF N, aq_err 16". To workaround the issue, enable less than
+64 virtual functions (VFs).
+
+Data Center Bridging (DCB)
+--------------------------
+DCB is a configuration Quality of Service implementation in hardware. It uses
+the VLAN priority tag (802.1p) to filter traffic. That means that there are 8
+different priorities that traffic can be filtered into. It also enables
+priority flow control (802.1Qbb) which can limit or eliminate the number of
+dropped packets during network stress. Bandwidth can be allocated to each of
+these priorities, which is enforced at the hardware level (802.1Qaz).
+
+Adapter firmware implements LLDP and DCBX protocol agents as per 802.1AB and
+802.1Qaz respectively. The firmware based DCBX agent runs in willing mode only
+and can accept settings from a DCBX capable peer. Software configuration of
+DCBX parameters via dcbtool/lldptool are not supported.
+
+NOTE: Firmware LLDP can be disabled by setting the private flag disable-fw-lldp.
+
+The i40e driver implements the DCB netlink interface layer to allow user-space
+to communicate with the driver and query DCB configuration for the port.
+
+NOTE:
+The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+Interrupt Rate Limiting
+-----------------------
+:Valid Range: 0-235 (0=no limit)
+
+The Intel(R) Ethernet Controller XL710 family supports an interrupt rate
+limiting mechanism. The user can control, via ethtool, the number of
+microseconds between interrupts.
+
+Syntax::
+
+ # ethtool -C ethX rx-usecs-high N
+
+The range of 0-235 microseconds provides an effective range of 4,310 to 250,000
+interrupts per second. The value of rx-usecs-high can be set independently of
+rx-usecs and tx-usecs in the same ethtool command, and is also independent of
+the adaptive interrupt moderation algorithm. The underlying hardware supports
+granularity in 4-microsecond intervals, so adjacent values may result in the
+same interrupt rate.
+
+One possible use case is the following::
+
+ # ethtool -C ethX adaptive-rx off adaptive-tx off rx-usecs-high 20 rx-usecs \
+ 5 tx-usecs 5
+
+The above command would disable adaptive interrupt moderation, and allow a
+maximum of 5 microseconds before indicating a receive or transmit was complete.
+However, instead of resulting in as many as 200,000 interrupts per second, it
+limits total interrupts per second to 50,000 via the rx-usecs-high parameter.
+
+Performance Optimization
+========================
+Driver defaults are meant to fit a wide variety of workloads, but if further
+optimization is required we recommend experimenting with the following settings.
+
+NOTE: For better performance when processing small (64B) frame sizes, try
+enabling Hyper threading in the BIOS in order to increase the number of logical
+cores in the system and subsequently increase the number of queues available to
+the adapter.
+
+Virtualized Environments
+------------------------
+1. Disable XPS on both ends by using the included virt_perf_default script
+or by running the following command as root::
+
+ for file in `ls /sys/class/net/<ethX>/queues/tx-*/xps_cpus`;
+ do echo 0 > $file; done
+
+2. Using the appropriate mechanism (vcpupin) in the vm, pin the cpu's to
+individual lcpu's, making sure to use a set of cpu's included in the
+device's local_cpulist: /sys/class/net/<ethX>/device/local_cpulist.
+
+3. Configure as many Rx/Tx queues in the VM as available. Do not rely on
+the default setting of 1.
+
+
+Non-virtualized Environments
+----------------------------
+Pin the adapter's IRQs to specific cores by disabling the irqbalance service
+and using the included set_irq_affinity script. Please see the script's help
+text for further options.
+
+- The following settings will distribute the IRQs across all the cores evenly::
+
+ # scripts/set_irq_affinity -x all <interface1> , [ <interface2>, ... ]
+
+- The following settings will distribute the IRQs across all the cores that are
+ local to the adapter (same NUMA node)::
+
+ # scripts/set_irq_affinity -x local <interface1> ,[ <interface2>, ... ]
+
+For very CPU intensive workloads, we recommend pinning the IRQs to all cores.
+
+For IP Forwarding: Disable Adaptive ITR and lower Rx and Tx interrupts per
+queue using ethtool.
+
+- Setting rx-usecs and tx-usecs to 125 will limit interrupts to about 8000
+ interrupts per second per queue.
+
+::
+
+ # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 125 \
+ tx-usecs 125
+
+For lower CPU utilization: Disable Adaptive ITR and lower Rx and Tx interrupts
+per queue using ethtool.
+
+- Setting rx-usecs and tx-usecs to 250 will limit interrupts to about 4000
+ interrupts per second per queue.
+
+::
+
+ # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 250 \
+ tx-usecs 250
+
+For lower latency: Disable Adaptive ITR and ITR by setting Rx and Tx to 0 using
+ethtool.
+
+::
+
+ # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 0 \
+ tx-usecs 0
+
+Application Device Queues (ADq)
+-------------------------------
+Application Device Queues (ADq) allows you to dedicate one or more queues to a
+specific application. This can reduce latency for the specified application,
+and allow Tx traffic to be rate limited per application. Follow the steps below
+to set ADq.
+
+1. Create traffic classes (TCs). Maximum of 8 TCs can be created per interface.
+The shaper bw_rlimit parameter is optional.
+
+Example: Sets up two tcs, tc0 and tc1, with 16 queues each and max tx rate set
+to 1Gbit for tc0 and 3Gbit for tc1.
+
+::
+
+ # tc qdisc add dev <interface> root mqprio num_tc 2 map 0 0 0 0 1 1 1 1
+ queues 16@0 16@16 hw 1 mode channel shaper bw_rlimit min_rate 1Gbit 2Gbit
+ max_rate 1Gbit 3Gbit
+
+map: priority mapping for up to 16 priorities to tcs (e.g. map 0 0 0 0 1 1 1 1
+sets priorities 0-3 to use tc0 and 4-7 to use tc1)
+
+queues: for each tc, <num queues>@<offset> (e.g. queues 16@0 16@16 assigns
+16 queues to tc0 at offset 0 and 16 queues to tc1 at offset 16. Max total
+number of queues for all tcs is 64 or number of cores, whichever is lower.)
+
+hw 1 mode channel: ‘channel’ with ‘hw’ set to 1 is a new new hardware
+offload mode in mqprio that makes full use of the mqprio options, the
+TCs, the queue configurations, and the QoS parameters.
+
+shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
+Totals must be equal or less than port speed.
+
+For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
+monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+
+2. Enable HW TC offload on interface::
+
+ # ethtool -K <interface> hw-tc-offload on
+
+3. Apply TCs to ingress (RX) flow of interface::
+
+ # tc qdisc add dev <interface> ingress
+
+NOTES:
+ - Run all tc commands from the iproute2 <pathtoiproute2>/tc/ directory.
+ - ADq is not compatible with cloud filters.
+ - Setting up channels via ethtool (ethtool -L) is not supported when the
+ TCs are configured using mqprio.
+ - You must have iproute2 latest version
+ - NVM version 6.01 or later is required.
+ - ADq cannot be enabled when any the following features are enabled: Data
+ Center Bridging (DCB), Multiple Functions per Port (MFP), or Sideband
+ Filters.
+ - If another driver (for example, DPDK) has set cloud filters, you cannot
+ enable ADq.
+ - Tunnel filters are not supported in ADq. If encapsulated packets do
+ arrive in non-tunnel mode, filtering will be done on the inner headers.
+ For example, for VXLAN traffic in non-tunnel mode, PCTYPE is identified
+ as a VXLAN encapsulated packet, outer headers are ignored. Therefore,
+ inner headers are matched.
+ - If a TC filter on a PF matches traffic over a VF (on the PF), that
+ traffic will be routed to the appropriate queue of the PF, and will
+ not be passed on the VF. Such traffic will end up getting dropped higher
+ up in the TCP/IP stack as it does not match PF address data.
+ - If traffic matches multiple TC filters that point to different TCs,
+ that traffic will be duplicated and sent to all matching TC queues.
+ The hardware switch mirrors the packet to a VSI list when multiple
+ filters are matched.
+
+
+Known Issues/Troubleshooting
+============================
+
+NOTE: 1 Gb devices based on the Intel(R) Ethernet Network Connection X722 do
+not support the following features:
+
+ * Data Center Bridging (DCB)
+ * QOS
+ * VMQ
+ * SR-IOV
+ * Task Encapsulation offload (VXLAN, NVGRE)
+ * Energy Efficient Ethernet (EEE)
+ * Auto-media detect
+
+Unexpected Issues when the device driver and DPDK share a device
+----------------------------------------------------------------
+Unexpected issues may result when an i40e device is in multi driver mode and
+the kernel driver and DPDK driver are sharing the device. This is because
+access to the global NIC resources is not synchronized between multiple
+drivers. Any change to the global NIC configuration (writing to a global
+register, setting global configuration by AQ, or changing switch modes) will
+affect all ports and drivers on the device. Loading DPDK with the
+"multi-driver" module parameter may mitigate some of the issues.
+
+TC0 must be enabled when setting up DCB on a switch
+---------------------------------------------------
+The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt
deleted file mode 100644
index c2d6e1824b29..000000000000
--- a/Documentation/networking/i40e.txt
+++ /dev/null
@@ -1,190 +0,0 @@
-Linux Base Driver for the Intel(R) Ethernet Controller XL710 Family
-===================================================================
-
-Intel i40e Linux driver.
-Copyright(c) 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Performance Tuning
-- Known Issues
-- Support
-
-
-Identifying Your Adapter
-========================
-
-The driver in this release is compatible with the Intel Ethernet
-Controller XL710 Family.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/network/sb/CS-012904.htm
-
-
-Enabling the driver
-===================
-
-The driver is enabled via the standard kernel configuration system,
-using the make command:
-
- make config/oldconfig/menuconfig/etc.
-
-The driver is located in the menu structure at:
-
- -> Device Drivers
- -> Network device support (NETDEVICES [=y])
- -> Ethernet driver support
- -> Intel devices
- -> Intel(R) Ethernet Controller XL710 Family
-
-Additional Configurations
-=========================
-
- Generic Receive Offload (GRO)
- -----------------------------
- The driver supports the in-kernel software implementation of GRO. GRO has
- shown that by coalescing Rx traffic into larger chunks of data, CPU
- utilization can be significantly reduced when under large Rx load. GRO is
- an evolution of the previously-used LRO interface. GRO is able to coalesce
- other protocols besides TCP. It's also safe to use with configurations that
- are problematic for LRO, namely bridging and iSCSI.
-
- Ethtool
- -------
- The driver utilizes the ethtool interface for driver configuration and
- diagnostics, as well as displaying statistical information. The latest
- ethtool version is required for this functionality.
-
- The latest release of ethtool can be found from
- https://www.kernel.org/pub/software/network/ethtool
-
-
- Flow Director n-ntuple traffic filters (FDir)
- ---------------------------------------------
- The driver utilizes the ethtool interface for configuring ntuple filters,
- via "ethtool -N <device> <filter>".
-
- The sctp4, ip4, udp4, and tcp4 flow types are supported with the standard
- fields including src-ip, dst-ip, src-port and dst-port. The driver only
- supports fully enabling or fully masking the fields, so use of the mask
- fields for partial matches is not supported.
-
- Additionally, the driver supports using the action to specify filters for a
- Virtual Function. You can specify the action as a 64bit value, where the
- lower 32 bits represents the queue number, while the next 8 bits represent
- which VF. Note that 0 is the PF, so the VF identifier is offset by 1. For
- example:
-
- ... action 0x800000002 ...
-
- Would indicate to direct traffic for Virtual Function 7 (8 minus 1) on queue
- 2 of that VF.
-
- The driver also supports using the user-defined field to specify 2 bytes of
- arbitrary data to match within the packet payload in addition to the regular
- fields. The data is specified in the lower 32bits of the user-def field in
- the following way:
-
- +----------------------------+---------------------------+
- | 31 28 24 20 16 | 15 12 8 4 0|
- +----------------------------+---------------------------+
- | offset into packet payload | 2 bytes of flexible data |
- +----------------------------+---------------------------+
-
- As an example,
-
- ... user-def 0x4FFFF ....
-
- means to match the value 0xFFFF 4 bytes into the packet payload. Note that
- the offset is based on the beginning of the payload, and not the beginning
- of the packet. Thus
-
- flow-type tcp4 ... user-def 0x8BEAF ....
-
- would match TCP/IPv4 packets which have the value 0xBEAF 8bytes into the
- TCP/IPv4 payload.
-
- For ICMP, the hardware parses the ICMP header as 4 bytes of header and 4
- bytes of payload, so if you want to match an ICMP frames payload you may need
- to add 4 to the offset in order to match the data.
-
- Furthermore, the offset can only be up to a value of 64, as the hardware
- will only read up to 64 bytes of data from the payload. It must also be even
- as the flexible data is 2 bytes long and must be aligned to byte 0 of the
- packet payload.
-
- When programming filters, the hardware is limited to using a single input
- set for each flow type. This means that it is an error to program two
- different filters with the same type that don't match on the same fields.
- Thus the second of the following two commands will fail:
-
- ethtool -N <device> flow-type tcp4 src-ip 192.168.0.7 action 5
- ethtool -N <device> flow-type tcp4 dst-ip 192.168.15.18 action 1
-
- This is because the first filter will be accepted and reprogram the input
- set for TCPv4 filters, but the second filter will be unable to reprogram the
- input set until all the conflicting TCPv4 filters are first removed.
-
- Note that the user-defined flexible offset is also considered part of the
- input set and cannot be programmed separately for multiple filters of the
- same type. However, the flexible data is not part of the input set and
- multiple filters may use the same offset but match against different data.
-
- Data Center Bridging (DCB)
- --------------------------
- DCB configuration is not currently supported.
-
- FCoE
- ----
- The driver supports Fiber Channel over Ethernet (FCoE) and Data Center
- Bridging (DCB) functionality. Configuring DCB and FCoE is outside the scope
- of this driver doc. Refer to http://www.open-fcoe.org/ for FCoE project
- information and http://www.open-lldp.org/ or email list
- e1000-eedc@lists.sourceforge.net for DCB information.
-
- MAC and VLAN anti-spoofing feature
- ----------------------------------
- When a malicious driver attempts to send a spoofed packet, it is dropped by
- the hardware and not transmitted. An interrupt is sent to the PF driver
- notifying it of the spoof attempt.
-
- When a spoofed packet is detected the PF driver will send the following
- message to the system log (displayed by the "dmesg" command):
-
- Spoof event(s) detected on VF (n)
-
- Where n=the VF that attempted to do the spoofing.
-
-
-Performance Tuning
-==================
-
-An excellent article on performance tuning can be found at:
-
-http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
-
-
-Known Issues
-============
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://e1000.sourceforge.net
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sourceforge.net and copy
-netdev@vger.kernel.org.
diff --git a/Documentation/networking/iavf.rst b/Documentation/networking/iavf.rst
new file mode 100644
index 000000000000..f8b42b64eb28
--- /dev/null
+++ b/Documentation/networking/iavf.rst
@@ -0,0 +1,281 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function
+==================================================================
+
+Intel Ethernet Adaptive Virtual Function Linux driver.
+Copyright(c) 2013-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Known Issues/Troubleshooting
+- Support
+
+This file describes the iavf Linux* Base Driver. This driver was formerly
+called i40evf.
+
+The iavf driver supports the below mentioned virtual function devices and
+can only be activated on kernels running the i40e or newer Physical Function
+(PF) driver compiled with CONFIG_PCI_IOV. The iavf driver requires
+CONFIG_PCI_MSI to be enabled.
+
+The guest OS loading the iavf driver must support MSI-X interrupts.
+
+Identifying Your Adapter
+========================
+The driver in this kernel is compatible with devices based on the following:
+ * Intel(R) XL710 X710 Virtual Function
+ * Intel(R) X722 Virtual Function
+ * Intel(R) XXV710 Virtual Function
+ * Intel(R) Ethernet Adaptive Virtual Function
+
+For the best performance, make sure the latest NVM/FW is installed on your
+device.
+
+For information on how to identify your adapter, and for the latest NVM/FW
+images and Intel network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Additional Features and Configurations
+======================================
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+ dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://www.kernel.org/pub/software/network/ethtool/
+
+Setting VLAN Tag Stripping
+--------------------------
+If you have applications that require Virtual Functions (VFs) to receive
+packets with VLAN tags, you can disable VLAN tag stripping for the VF. The
+Physical Function (PF) processes requests issued from the VF to enable or
+disable VLAN tag stripping. Note that if the PF has assigned a VLAN to a VF,
+then requests from that VF to set VLAN tag stripping will be ignored.
+
+To enable/disable VLAN tag stripping for a VF, issue the following command
+from inside the VM in which you are running the VF::
+
+ ethtool -K <if_name> rxvlan on/off
+
+or alternatively::
+
+ ethtool --offload <if_name> rxvlan on/off
+
+Adaptive Virtual Function
+-------------------------
+Adaptive Virtual Function (AVF) allows the virtual function driver, or VF, to
+adapt to changing feature sets of the physical function driver (PF) with which
+it is associated. This allows system administrators to update a PF without
+having to update all the VFs associated with it. All AVFs have a single common
+device ID and branding string.
+
+AVFs have a minimum set of features known as "base mode," but may provide
+additional features depending on what features are available in the PF with
+which the AVF is associated. The following are base mode features:
+
+- 4 Queue Pairs (QP) and associated Configuration Status Registers (CSRs)
+ for Tx/Rx.
+- i40e descriptors and ring format.
+- Descriptor write-back completion.
+- 1 control queue, with i40e descriptors, CSRs and ring format.
+- 5 MSI-X interrupt vectors and corresponding i40e CSRs.
+- 1 Interrupt Throttle Rate (ITR) index.
+- 1 Virtual Station Interface (VSI) per VF.
+- 1 Traffic Class (TC), TC0
+- Receive Side Scaling (RSS) with 64 entry indirection table and key,
+ configured through the PF.
+- 1 unicast MAC address reserved per VF.
+- 16 MAC address filters for each VF.
+- Stateless offloads - non-tunneled checksums.
+- AVF device ID.
+- HW mailbox is used for VF to PF communications (including on Windows).
+
+IEEE 802.1ad (QinQ) Support
+---------------------------
+The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
+IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
+"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks
+allow L2 tunneling and the ability to segregate traffic within a particular
+VLAN ID, among other uses.
+
+The following are examples of how to configure 802.1ad (QinQ)::
+
+ ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24
+ ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371
+
+Where "24" and "371" are example VLAN IDs.
+
+NOTES:
+ Receive checksum offloads, cloud filters, and VLAN acceleration are not
+ supported for 802.1ad (QinQ) packets.
+
+Application Device Queues (ADq)
+-------------------------------
+Application Device Queues (ADq) allows you to dedicate one or more queues to a
+specific application. This can reduce latency for the specified application,
+and allow Tx traffic to be rate limited per application. Follow the steps below
+to set ADq.
+
+1. Create traffic classes (TCs). Maximum of 8 TCs can be created per interface.
+The shaper bw_rlimit parameter is optional.
+
+Example: Sets up two tcs, tc0 and tc1, with 16 queues each and max tx rate set
+to 1Gbit for tc0 and 3Gbit for tc1.
+
+::
+
+ # tc qdisc add dev <interface> root mqprio num_tc 2 map 0 0 0 0 1 1 1 1
+ queues 16@0 16@16 hw 1 mode channel shaper bw_rlimit min_rate 1Gbit 2Gbit
+ max_rate 1Gbit 3Gbit
+
+map: priority mapping for up to 16 priorities to tcs (e.g. map 0 0 0 0 1 1 1 1
+sets priorities 0-3 to use tc0 and 4-7 to use tc1)
+
+queues: for each tc, <num queues>@<offset> (e.g. queues 16@0 16@16 assigns
+16 queues to tc0 at offset 0 and 16 queues to tc1 at offset 16. Max total
+number of queues for all tcs is 64 or number of cores, whichever is lower.)
+
+hw 1 mode channel: ‘channel’ with ‘hw’ set to 1 is a new new hardware
+offload mode in mqprio that makes full use of the mqprio options, the
+TCs, the queue configurations, and the QoS parameters.
+
+shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
+Totals must be equal or less than port speed.
+
+For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
+monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+
+2. Enable HW TC offload on interface::
+
+ # ethtool -K <interface> hw-tc-offload on
+
+3. Apply TCs to ingress (RX) flow of interface::
+
+ # tc qdisc add dev <interface> ingress
+
+NOTES:
+ - Run all tc commands from the iproute2 <pathtoiproute2>/tc/ directory.
+ - ADq is not compatible with cloud filters.
+ - Setting up channels via ethtool (ethtool -L) is not supported when the TCs
+ are configured using mqprio.
+ - You must have iproute2 latest version
+ - NVM version 6.01 or later is required.
+ - ADq cannot be enabled when any the following features are enabled: Data
+ Center Bridging (DCB), Multiple Functions per Port (MFP), or Sideband Filters.
+ - If another driver (for example, DPDK) has set cloud filters, you cannot
+ enable ADq.
+ - Tunnel filters are not supported in ADq. If encapsulated packets do arrive
+ in non-tunnel mode, filtering will be done on the inner headers. For example,
+ for VXLAN traffic in non-tunnel mode, PCTYPE is identified as a VXLAN
+ encapsulated packet, outer headers are ignored. Therefore, inner headers are
+ matched.
+ - If a TC filter on a PF matches traffic over a VF (on the PF), that traffic
+ will be routed to the appropriate queue of the PF, and will not be passed on
+ the VF. Such traffic will end up getting dropped higher up in the TCP/IP
+ stack as it does not match PF address data.
+ - If traffic matches multiple TC filters that point to different TCs, that
+ traffic will be duplicated and sent to all matching TC queues. The hardware
+ switch mirrors the packet to a VSI list when multiple filters are matched.
+
+
+Known Issues/Troubleshooting
+============================
+
+Traffic Is Not Being Passed Between VM and Client
+-------------------------------------------------
+You may not be able to pass traffic between a client system and a
+Virtual Machine (VM) running on a separate host if the Virtual Function
+(VF, or Virtual NIC) is not in trusted mode and spoof checking is enabled
+on the VF. Note that this situation can occur in any combination of client,
+host, and guest operating system. For information on how to set the VF to
+trusted mode, refer to the section "VLAN Tag Packet Steering" in this
+readme document. For information on setting spoof checking, refer to the
+section "MAC and VLAN anti-spoofing feature" in this readme document.
+
+Do not unload port driver if VF with active VM is bound to it
+-------------------------------------------------------------
+Do not unload a port's driver if a Virtual Function (VF) with an active Virtual
+Machine (VM) is bound to it. Doing so will cause the port to appear to hang.
+Once the VM shuts down, or otherwise releases the VF, the command will complete.
+
+Virtual machine does not get link
+---------------------------------
+If the virtual machine has more than one virtual port assigned to it, and those
+virtual ports are bound to different physical ports, you may not get link on
+all of the virtual ports. The following command may work around the issue::
+
+ ethtool -r <PF>
+
+Where <PF> is the PF interface in the host, for example: p5p1. You may need to
+run the command more than once to get link on all virtual ports.
+
+MAC address of Virtual Function changes unexpectedly
+----------------------------------------------------
+If a Virtual Function's MAC address is not assigned in the host, then the VF
+(virtual function) driver will use a random MAC address. This random MAC
+address may change each time the VF driver is reloaded. You can assign a static
+MAC address in the host machine. This static MAC address will survive
+a VF driver reload.
+
+Driver Buffer Overflow Fix
+--------------------------
+The fix to resolve CVE-2016-8105, referenced in Intel SA-00069
+https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00069.html
+is included in this and future versions of the driver.
+
+Multiple Interfaces on Same Ethernet Broadcast Network
+------------------------------------------------------
+Due to the default ARP behavior on Linux, it is not possible to have one system
+on two IP networks in the same Ethernet broadcast domain (non-partitioned
+switch) behave as expected. All Ethernet interfaces will respond to IP traffic
+for any IP address assigned to the system. This results in unbalanced receive
+traffic.
+
+If you have multiple interfaces in a server, either turn on ARP filtering by
+entering::
+
+ echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+
+NOTE: This setting is not saved across reboots. The configuration change can be
+made permanent by adding the following line to the file /etc/sysctl.conf::
+
+ net.ipv4.conf.all.arp_filter = 1
+
+Another alternative is to install the interfaces in separate broadcast domains
+(either in different switches or in a switch partitioned to VLANs).
+
+Rx Page Allocation Errors
+-------------------------
+'Page allocation failure. order:0' errors may occur under stress.
+This is caused by the way the Linux kernel reports this stressed condition.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/iavf.txt b/Documentation/networking/iavf.txt
deleted file mode 100644
index cc902a2369d6..000000000000
--- a/Documentation/networking/iavf.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Linux* Base Driver for Intel(R) Network Connection
-==================================================
-
-Intel Ethernet Adaptive Virtual Function Linux driver.
-Copyright(c) 2013-2018 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Known Issues/Troubleshooting
-- Support
-
-This file describes the iavf Linux* Base Driver. This driver
-was formerly called i40evf.
-
-The iavf driver supports the below mentioned virtual function
-devices and can only be activated on kernels running the i40e or
-newer Physical Function (PF) driver compiled with CONFIG_PCI_IOV.
-The iavf driver requires CONFIG_PCI_MSI to be enabled.
-
-The guest OS loading the iavf driver must support MSI-X interrupts.
-
-Supported Hardware
-==================
-Intel XL710 X710 Virtual Function
-Intel X722 Virtual Function
-Intel Ethernet Adaptive Virtual Function
-
-Identifying Your Adapter
-========================
-
-For more information on how to identify your adapter, go to the
-Adapter & Driver ID Guide at:
-
- https://www.intel.com/content/www/us/en/support/articles/000005584/network-and-i-o/ethernet-products.html
-
-
-Known Issues/Troubleshooting
-============================
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ice.rst b/Documentation/networking/ice.rst
new file mode 100644
index 000000000000..1e4948c9e989
--- /dev/null
+++ b/Documentation/networking/ice.rst
@@ -0,0 +1,45 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series
+===================================================================
+
+Intel ice Linux driver.
+Copyright(c) 2018 Intel Corporation.
+
+Contents
+========
+
+- Enabling the driver
+- Support
+
+The driver in this release supports Intel's E800 Series of products. For
+more information, visit Intel's support page at https://support.intel.com.
+
+Enabling the driver
+===================
+The driver is enabled via the standard kernel configuration system,
+using the make command::
+
+ make oldconfig/silentoldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+ -> Device Drivers
+ -> Network device support (NETDEVICES [=y])
+ -> Ethernet driver support
+ -> Intel devices
+ -> Intel(R) Ethernet Connection E800 Series Support
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/ice.txt b/Documentation/networking/ice.txt
deleted file mode 100644
index 6261c46378e1..000000000000
--- a/Documentation/networking/ice.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Intel(R) Ethernet Connection E800 Series Linux Driver
-===================================================================
-
-Intel ice Linux driver.
-Copyright(c) 2018 Intel Corporation.
-
-Contents
-========
-- Enabling the driver
-- Support
-
-The driver in this release supports Intel's E800 Series of products. For
-more information, visit Intel's support page at http://support.intel.com.
-
-Enabling the driver
-===================
-
-The driver is enabled via the standard kernel configuration system,
-using the make command:
-
- Make oldconfig/silentoldconfig/menuconfig/etc.
-
-The driver is located in the menu structure at:
-
- -> Device Drivers
- -> Network device support (NETDEVICES [=y])
- -> Ethernet driver support
- -> Intel devices
- -> Intel(R) Ethernet Connection E800 Series Support
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-If an issue is identified with the released source code, please email
-the maintainer listed in the MAINTAINERS file.
diff --git a/Documentation/networking/igb.rst b/Documentation/networking/igb.rst
new file mode 100644
index 000000000000..ba16b86d5593
--- /dev/null
+++ b/Documentation/networking/igb.rst
@@ -0,0 +1,193 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Support
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Command Line Parameters
+========================
+If the driver is built as a module, the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax::
+
+ modprobe igb [<option>=<VAL1>,<VAL2>,...]
+
+There needs to be a <VAL#> for each network port in the system supported by
+this driver. The values will be applied to each instance, in function order.
+For example::
+
+ modprobe igb max_vfs=2,4
+
+In this case, there are two network ports supported by igb in the system.
+
+NOTE: A descriptor describes a data buffer and attributes related to the data
+buffer. This information is accessed by the hardware.
+
+max_vfs
+-------
+:Valid Range: 0-7
+
+This parameter adds support for SR-IOV. It causes the driver to spawn up to
+max_vfs worth of virtual functions. If the value is greater than 0 it will
+also force the VMDq parameter to be 1 or more.
+
+The parameters for the driver are referenced by position. Thus, if you have a
+dual port adapter, or more than one adapter in your system, and want N virtual
+functions per port, you must specify a number for each port with each parameter
+separated by a comma. For example::
+
+ modprobe igb max_vfs=4
+
+This will spawn 4 VFs on the first port.
+
+::
+
+ modprobe igb max_vfs=2,4
+
+This will spawn 2 VFs on the first port and 4 VFs on the second port.
+
+NOTE: Caution must be used in loading the driver with these parameters.
+Depending on your system configuration, number of slots, etc., it is impossible
+to predict in all cases where the positions would be on the command line.
+
+NOTE: Neither the device nor the driver control how VFs are mapped into config
+space. Bus layout will vary by operating system. On operating systems that
+support it, you can check sysfs to find the mapping.
+
+NOTE: When either SR-IOV mode or VMDq mode is enabled, hardware VLAN filtering
+and VLAN tag stripping/insertion will remain enabled. Please remove the old
+VLAN filter before the new VLAN filter is added. For example::
+
+ ip link set eth0 vf 0 vlan 100 // set vlan 100 for VF 0
+ ip link set eth0 vf 0 vlan 0 // Delete vlan 100
+ ip link set eth0 vf 0 vlan 200 // set a new vlan 200 for VF 0
+
+Debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level debug messages displayed in the system logs.
+
+
+Additional Features and Configurations
+======================================
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+ ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ ip link set mtu 9000 dev eth<x>
+ ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file:
+
+- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x>
+- For SLES: /etc/sysconfig/network/<config_file>
+
+NOTE: The maximum MTU setting for Jumbo Frames is 9216. This value coincides
+with the maximum Jumbo Frames size of 9234 bytes.
+
+NOTE: Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+poor performance or loss of link.
+
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+
+https://www.kernel.org/pub/software/network/ethtool/
+
+
+Enabling Wake on LAN* (WoL)
+---------------------------
+WoL is configured through the ethtool* utility.
+
+WoL will be enabled on the system during the next shut down or reboot. For
+this driver version, in order to enable WoL, the igb driver must be loaded
+prior to shutting down or suspending the system.
+
+NOTE: Wake on LAN is only supported on port A of multi-port devices. Also
+Wake On LAN is not supported for the following device:
+- Intel(R) Gigabit VT Quad Port Server Adapter
+
+
+Multiqueue
+----------
+In this mode, a separate MSI-X vector is allocated for each queue and one for
+"other" interrupts such as link status change and errors. All interrupts are
+throttled via interrupt moderation. Interrupt moderation must be used to avoid
+interrupt storms while the driver is processing one interrupt. The moderation
+value should be at least as large as the expected time for the driver to
+process an interrupt. Multiqueue is off by default.
+
+REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not found,
+the system will fallback to MSI or to Legacy interrupts. This driver supports
+receive multiqueue on all kernels that support MSI-X.
+
+NOTE: On some kernels a reboot is required to switch between single queue mode
+and multiqueue mode or vice-versa.
+
+
+MAC and VLAN anti-spoofing feature
+----------------------------------
+When a malicious driver attempts to send a spoofed packet, it is dropped by the
+hardware and not transmitted.
+
+An interrupt is sent to the PF driver notifying it of the spoof attempt. When a
+spoofed packet is detected, the PF driver will send the following message to
+the system log (displayed by the "dmesg" command):
+Spoof event(s) detected on VF(n), where n = the VF that attempted to do the
+spoofing
+
+
+Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool
+------------------------------------------------------------
+You can set a MAC address of a Virtual Function (VF), a default VLAN and the
+rate limit using the IProute2 tool. Download the latest version of the
+IProute2 tool from Sourceforge if your version does not have all the features
+you require.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt
deleted file mode 100644
index f90643ef39c9..000000000000
--- a/Documentation/networking/igb.txt
+++ /dev/null
@@ -1,129 +0,0 @@
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Support
-
-Identifying Your Adapter
-========================
-
-This driver supports all 82575, 82576 and 82580-based Intel (R) gigabit network
-connections.
-
-For specific information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/go/network/adapter/idguide.htm
-
-Command Line Parameters
-=======================
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-max_vfs
--------
-Valid Range: 0-7
-Default Value: 0
-
-This parameter adds support for SR-IOV. It causes the driver to spawn up to
-max_vfs worth of virtual function.
-
-Additional Configurations
-=========================
-
- Jumbo Frames
- ------------
- Jumbo Frames support is enabled by changing the MTU to a value larger than
- the default of 1500. Use the ip command to increase the MTU size.
- For example:
-
- ip link set dev eth<x> mtu 9000
-
- This setting is not saved across reboots.
-
- Notes:
-
- - The maximum MTU setting for Jumbo Frames is 9216. This value coincides
- with the maximum Jumbo Frames size of 9234 bytes.
-
- - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
- poor performance or loss of link.
-
- ethtool
- -------
- The driver utilizes the ethtool interface for driver configuration and
- diagnostics, as well as displaying statistical information. The latest
- version of ethtool can be found at:
-
- https://www.kernel.org/pub/software/network/ethtool/
-
- Enabling Wake on LAN* (WoL)
- ---------------------------
- WoL is configured through the ethtool* utility.
-
- For instructions on enabling WoL with ethtool, refer to the ethtool man page.
-
- WoL will be enabled on the system during the next shut down or reboot.
- For this driver version, in order to enable WoL, the igb driver must be
- loaded when shutting down or rebooting the system.
-
- Wake On LAN is only supported on port A of multi-port adapters.
-
- Wake On LAN is not supported for the Intel(R) Gigabit VT Quad Port Server
- Adapter.
-
- Multiqueue
- ----------
- In this mode, a separate MSI-X vector is allocated for each queue and one
- for "other" interrupts such as link status change and errors. All
- interrupts are throttled via interrupt moderation. Interrupt moderation
- must be used to avoid interrupt storms while the driver is processing one
- interrupt. The moderation value should be at least as large as the expected
- time for the driver to process an interrupt. Multiqueue is off by default.
-
- REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not
- found, the system will fallback to MSI or to Legacy interrupts.
-
- MAC and VLAN anti-spoofing feature
- ----------------------------------
- When a malicious driver attempts to send a spoofed packet, it is dropped by
- the hardware and not transmitted. An interrupt is sent to the PF driver
- notifying it of the spoof attempt.
-
- When a spoofed packet is detected the PF driver will send the following
- message to the system log (displayed by the "dmesg" command):
-
- Spoof event(s) detected on VF(n)
-
- Where n=the VF that attempted to do the spoofing.
-
- Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool
- ------------------------------------------------------------
- You can set a MAC address of a Virtual Function (VF), a default VLAN and the
- rate limit using the IProute2 tool. Download the latest version of the
- iproute2 tool from Sourceforge if your version does not have all the
- features you require.
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- www.intel.com/support/
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/igbvf.rst b/Documentation/networking/igbvf.rst
new file mode 100644
index 000000000000..a8a9ffa4f8d3
--- /dev/null
+++ b/Documentation/networking/igbvf.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet
+============================================================
+
+Intel Gigabit Virtual Function Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+- Identifying Your Adapter
+- Additional Configurations
+- Support
+
+This driver supports Intel 82576-based virtual function devices-based virtual
+function devices that can only be activated on kernels that support SR-IOV.
+
+SR-IOV requires the correct platform and OS support.
+
+The guest OS loading this driver must support MSI-X interrupts.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+Driver information can be obtained using ethtool, lspci, and ifconfig.
+Instructions on updating ethtool can be found in the section Additional
+Configurations later in this document.
+
+NOTE: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
+
+
+Identifying Your Adapter
+========================
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+http://www.intel.com/support
+
+
+Additional Features and Configurations
+======================================
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+
+https://www.kernel.org/pub/software/network/ethtool/
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/igbvf.txt b/Documentation/networking/igbvf.txt
deleted file mode 100644
index bd404735fb46..000000000000
--- a/Documentation/networking/igbvf.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Support
-
-This file describes the igbvf Linux* Base Driver for Intel Network Connection.
-
-The igbvf driver supports 82576-based virtual function devices that can only
-be activated on kernels that support SR-IOV. SR-IOV requires the correct
-platform and OS support.
-
-The igbvf driver requires the igb driver, version 2.0 or later. The igbvf
-driver supports virtual functions generated by the igb driver with a max_vfs
-value of 1 or greater. For more information on the max_vfs parameter refer
-to the README included with the igb driver.
-
-The guest OS loading the igbvf driver must support MSI-X interrupts.
-
-This driver is only supported as a loadable module at this time. Intel is
-not supplying patches against the kernel source to allow for static linking
-of the driver. For questions related to hardware requirements, refer to the
-documentation supplied with your Intel Gigabit adapter. All hardware
-requirements listed apply to use with Linux.
-
-Instructions on updating ethtool can be found in the section "Additional
-Configurations" later in this document.
-
-VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
-
-Identifying Your Adapter
-========================
-
-The igbvf driver supports 82576-based virtual function devices that can only
-be activated on kernels that support SR-IOV.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/go/network/adapter/idguide.htm
-
-For the latest Intel network drivers for Linux, refer to the following
-website. In the search field, enter your adapter name or type, or use the
-networking link on the left to search for your adapter:
-
- http://downloadcenter.intel.com/scripts-df-external/Support_Intel.aspx
-
-Additional Configurations
-=========================
-
- ethtool
- -------
- The driver utilizes the ethtool interface for driver configuration and
- diagnostics, as well as displaying statistical information. The ethtool
- version 3.0 or later is required for this functionality, although we
- strongly recommend downloading the latest version at:
-
- https://www.kernel.org/pub/software/network/ethtool/
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index fcd710f2cc7a..bd89dae8d578 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -14,6 +14,16 @@ Contents:
dpaa2/index
e100
e1000
+ e1000e
+ fm10k
+ igb
+ igbvf
+ ixgb
+ ixgbe
+ ixgbevf
+ i40e
+ iavf
+ ice
kapi
z8530book
msg_zerocopy
diff --git a/Documentation/networking/ixgb.rst b/Documentation/networking/ixgb.rst
new file mode 100644
index 000000000000..8bd80e27843d
--- /dev/null
+++ b/Documentation/networking/ixgb.rst
@@ -0,0 +1,467 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
+=====================================================================
+
+October 1, 2018
+
+
+Contents
+========
+
+- In This Release
+- Identifying Your Adapter
+- Command Line Parameters
+- Improving Performance
+- Additional Configurations
+- Known Issues/Troubleshooting
+- Support
+
+
+
+In This Release
+===============
+
+This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R)
+Network Connection. This driver includes support for Itanium(R)2-based
+systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your 10 Gigabit adapter. All hardware requirements listed apply
+to use with Linux.
+
+The following features are available in this kernel:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+The driver information previously displayed in the /proc filesystem is not
+supported in this release. Alternatively, you can use ethtool (version 1.6
+or later), lspci, and iproute2 to obtain the same information.
+
+Instructions on updating ethtool can be found in the section "Additional
+Configurations" later in this document.
+
+
+Identifying Your Adapter
+========================
+
+The following Intel network adapters are compatible with the drivers in this
+release:
+
++------------+------------------------------+----------------------------------+
+| Controller | Adapter Name | Physical Layer |
++============+==============================+==================================+
+| 82597EX | Intel(R) PRO/10GbE LR/SR/CX4 | - 10G Base-LR (fiber) |
+| | Server Adapters | - 10G Base-SR (fiber) |
+| | | - 10G Base-CX4 (copper) |
++------------+------------------------------+----------------------------------+
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ https://support.intel.com
+
+
+Command Line Parameters
+=======================
+
+If the driver is built as a module, the following optional parameters are
+used by entering them on the command line with the modprobe command using
+this syntax::
+
+ modprobe ixgb [<option>=<VAL1>,<VAL2>,...]
+
+For example, with two 10GbE PCI adapters, entering::
+
+ modprobe ixgb TxDescriptors=80,128
+
+loads the ixgb driver with 80 TX resources for the first adapter and 128 TX
+resources for the second adapter.
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+Copybreak
+---------
+:Valid Range: 0-XXXX
+:Default Value: 256
+
+ This is the maximum size of packet that is copied to a new buffer on
+ receive.
+
+Debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+ This parameter adjusts the level of debug messages displayed in the
+ system logs.
+
+FlowControl
+-----------
+:Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+:Default Value: 1 if no EEPROM, otherwise read from EEPROM
+
+ This parameter controls the automatic generation(Tx) and response(Rx) to
+ Ethernet PAUSE frames. There are hardware bugs associated with enabling
+ Tx flow control so beware.
+
+RxDescriptors
+-------------
+:Valid Range: 64-4096
+:Default Value: 1024
+
+ This value is the number of receive descriptors allocated by the driver.
+ Increasing this value allows the driver to buffer more incoming packets.
+ Each descriptor is 16 bytes. A receive buffer is also allocated for
+ each descriptor and can be either 2048, 4056, 8192, or 16384 bytes,
+ depending on the MTU setting. When the MTU size is 1500 or less, the
+ receive buffer size is 2048 bytes. When the MTU is greater than 1500 the
+ receive buffer size will be either 4056, 8192, or 16384 bytes. The
+ maximum MTU size is 16114.
+
+TxDescriptors
+-------------
+:Valid Range: 64-4096
+:Default Value: 256
+
+ This value is the number of transmit descriptors allocated by the driver.
+ Increasing this value allows the driver to queue more transmits. Each
+ descriptor is 16 bytes.
+
+RxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 72
+
+ This value delays the generation of receive interrupts in units of
+ 0.8192 microseconds. Receive interrupt reduction can improve CPU
+ efficiency if properly tuned for specific network traffic. Increasing
+ this value adds extra latency to frame reception and can end up
+ decreasing the throughput of TCP traffic. If the system is reporting
+ dropped receives, this value may be set too high, causing the driver to
+ run out of available receive descriptors.
+
+TxIntDelay
+----------
+:Valid Range: 0-65535 (0=off)
+:Default Value: 32
+
+ This value delays the generation of transmit interrupts in units of
+ 0.8192 microseconds. Transmit interrupt reduction can improve CPU
+ efficiency if properly tuned for specific network traffic. Increasing
+ this value adds extra latency to frame transmission and can end up
+ decreasing the throughput of TCP traffic. If this value is set too high,
+ it will cause the driver to run out of available transmit descriptors.
+
+XsumRX
+------
+:Valid Range: 0-1
+:Default Value: 1
+
+ A value of '1' indicates that the driver should enable IP checksum
+ offload for received packets (both UDP and TCP) to the adapter hardware.
+
+RxFCHighThresh
+--------------
+:Valid Range: 1,536-262,136 (0x600 - 0x3FFF8, 8 byte granularity)
+:Default Value: 196,608 (0x30000)
+
+ Receive Flow control high threshold (when we send a pause frame)
+
+RxFCLowThresh
+-------------
+:Valid Range: 64-262,136 (0x40 - 0x3FFF8, 8 byte granularity)
+:Default Value: 163,840 (0x28000)
+
+ Receive Flow control low threshold (when we send a resume frame)
+
+FCReqTimeout
+------------
+:Valid Range: 1-65535
+:Default Value: 65535
+
+ Flow control request timeout (how long to pause the link partner's tx)
+
+IntDelayEnable
+--------------
+:Value Range: 0,1
+:Default Value: 1
+
+ Interrupt Delay, 0 disables transmit interrupt delay and 1 enables it.
+
+
+Improving Performance
+=====================
+
+With the 10 Gigabit server adapters, the default Linux configuration will
+very likely limit the total available throughput artificially. There is a set
+of configuration changes that, when applied together, will increase the ability
+of Linux to transmit and receive data. The following enhancements were
+originally acquired from settings published at http://www.spec.org/web99/ for
+various submitted results using Linux.
+
+NOTE:
+ These changes are only suggestions, and serve as a starting point for
+ tuning your network performance.
+
+The changes are made in three major ways, listed in order of greatest effect:
+
+- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
+ parameter.
+- Use sysctl to modify /proc parameters (essentially kernel tuning)
+- Use setpci to modify the MMRBC field in PCI-X configuration space to increase
+ transmit burst lengths on the bus.
+
+NOTE:
+ setpci modifies the adapter's configuration registers to allow it to read
+ up to 4k bytes at a time (for transmits). However, for some systems the
+ behavior after modifying this register may be undefined (possibly errors of
+ some kind). A power-cycle, hard reset or explicitly setting the e6 register
+ back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a
+ stable configuration.
+
+- COPY these lines and paste them into ixgb_perf.sh:
+
+::
+
+ #!/bin/bash
+ echo "configuring network performance , edit this file to change the interface
+ or device ID of 10GbE card"
+ # set mmrbc to 4k reads, modify only Intel 10GbE device IDs
+ # replace 1a48 with appropriate 10GbE device's ID installed on the system,
+ # if needed.
+ setpci -d 8086:1a48 e6.b=2e
+ # set the MTU (max transmission unit) - it requires your switch and clients
+ # to change as well.
+ # set the txqueuelen
+ # your ixgb adapter should be loaded as eth1 for this to work, change if needed
+ ip li set dev eth1 mtu 9000 txqueuelen 1000 up
+ # call the sysctl utility to modify /proc/sys entries
+ sysctl -p ./sysctl_ixgb.conf
+
+- COPY these lines and paste them into sysctl_ixgb.conf:
+
+::
+
+ # some of the defaults may be different for your kernel
+ # call this file with sysctl -p <this file>
+ # these are just suggested values that worked well to increase throughput in
+ # several network benchmark tests, your mileage may vary
+
+ ### IPV4 specific settings
+ # turn TCP timestamp support off, default 1, reduces CPU use
+ net.ipv4.tcp_timestamps = 0
+ # turn SACK support off, default on
+ # on systems with a VERY fast bus -> memory interface this is the big gainer
+ net.ipv4.tcp_sack = 0
+ # set min/default/max TCP read buffer, default 4096 87380 174760
+ net.ipv4.tcp_rmem = 10000000 10000000 10000000
+ # set min/pressure/max TCP write buffer, default 4096 16384 131072
+ net.ipv4.tcp_wmem = 10000000 10000000 10000000
+ # set min/pressure/max TCP buffer space, default 31744 32256 32768
+ net.ipv4.tcp_mem = 10000000 10000000 10000000
+
+ ### CORE settings (mostly for socket and UDP effect)
+ # set maximum receive socket buffer size, default 131071
+ net.core.rmem_max = 524287
+ # set maximum send socket buffer size, default 131071
+ net.core.wmem_max = 524287
+ # set default receive socket buffer size, default 65535
+ net.core.rmem_default = 524287
+ # set default send socket buffer size, default 65535
+ net.core.wmem_default = 524287
+ # set maximum amount of option memory buffers, default 10240
+ net.core.optmem_max = 524287
+ # set number of unprocessed input packets before kernel starts dropping them; default 300
+ net.core.netdev_max_backlog = 300000
+
+Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface
+your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's
+ID installed on the system.
+
+NOTE:
+ Unless these scripts are added to the boot process, these changes will
+ only last only until the next system reboot.
+
+
+Resolving Slow UDP Traffic
+--------------------------
+If your server does not seem to be able to receive UDP traffic as fast as it
+can receive TCP traffic, it could be because Linux, by default, does not set
+the network stack buffers as large as they need to be to support high UDP
+transfer rates. One way to alleviate this problem is to allow more memory to
+be used by the IP stack to store incoming data.
+
+For instance, use the commands::
+
+ sysctl -w net.core.rmem_max=262143
+
+and::
+
+ sysctl -w net.core.rmem_default=262143
+
+to increase the read buffer memory max and default to 262143 (256k - 1) from
+defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables
+will increase the amount of memory used by the network stack for receives, and
+can be increased significantly more if necessary for your application.
+
+
+Additional Configurations
+=========================
+
+Configuring the Driver on Different Distributions
+-------------------------------------------------
+Configuring a network driver to load properly when the system is started is
+distribution dependent. Typically, the configuration process involves adding
+an alias line to /etc/modprobe.conf as well as editing other system startup
+scripts and/or configuration files. Many popular Linux distributions ship
+with tools to make these changes for you. To learn the proper way to
+configure a network device for your system, refer to your distribution
+documentation. If during this process you are asked for the driver or module
+name, the name for the Linux Base Driver for the Intel 10GbE Family of
+Adapters is ixgb.
+
+Viewing Link Messages
+---------------------
+Link messages will not be displayed to the console if the distribution is
+restricting system messages. In order to see network driver link messages on
+your console, set dmesg to eight by entering the following::
+
+ dmesg -n 8
+
+NOTE: This setting is not saved across reboots.
+
+Jumbo Frames
+------------
+The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+enabled by changing the MTU to a value larger than the default of 1500.
+The maximum value for the MTU is 16114. Use the ip command to
+increase the MTU size. For example::
+
+ ip li set dev ethx mtu 9000
+
+The maximum MTU setting for Jumbo Frames is 16114. This value coincides
+with the maximum Jumbo Frames size of 16128.
+
+Ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The ethtool
+version 1.6 or later is required for this functionality.
+
+The latest release of ethtool can be found from
+https://www.kernel.org/pub/software/network/ethtool/
+
+NOTE:
+ The ethtool version 1.6 only supports a limited set of ethtool options.
+ Support for a more complete ethtool feature set can be enabled by
+ upgrading to the latest version.
+
+NAPI
+----
+NAPI (Rx polling mode) is supported in the ixgb driver.
+
+See https://wiki.linuxfoundation.org/networking/napi for more information on
+NAPI.
+
+
+Known Issues/Troubleshooting
+============================
+
+NOTE:
+ After installing the driver, if your Intel Network Connection is not
+ working, verify in the "In This Release" section of the readme that you have
+ installed the correct driver.
+
+Cable Interoperability Issue with Fujitsu XENPAK Module in SmartBits Chassis
+----------------------------------------------------------------------------
+Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4
+Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits
+chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni.
+The CRC errors may be received either by the Intel(R) PRO/10GbE CX4
+Server adapter or the SmartBits. If this situation occurs using a different
+cable assembly may resolve the issue.
+
+Cable Interoperability Issues with HP Procurve 3400cl Switch Port
+-----------------------------------------------------------------
+Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server
+adapter is connected to an HP Procurve 3400cl switch port using short cables
+(1 m or shorter). If this situation occurs, using a longer cable may resolve
+the issue.
+
+Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that
+Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC
+errors may be received either by the CX4 Server adapter or at the switch. If
+this situation occurs, using a different cable assembly may resolve the issue.
+
+Jumbo Frames System Requirement
+-------------------------------
+Memory allocation failures have been observed on Linux systems with 64 MB
+of RAM or less that are running Jumbo Frames. If you are using Jumbo
+Frames, your system may require more than the advertised minimum
+requirement of 64 MB of system memory.
+
+Performance Degradation with Jumbo Frames
+-----------------------------------------
+Degradation in throughput performance may be observed in some Jumbo frames
+environments. If this is observed, increasing the application's socket buffer
+size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
+See the specific application manual and /usr/src/linux*/Documentation/
+networking/ip-sysctl.txt for more details.
+
+Allocating Rx Buffers when Using Jumbo Frames
+---------------------------------------------
+Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if
+the available memory is heavily fragmented. This issue may be seen with PCI-X
+adapters or with packet split disabled. This can be reduced or eliminated
+by changing the amount of available memory for receive buffer allocation, by
+increasing /proc/sys/vm/min_free_kbytes.
+
+Multiple Interfaces on Same Ethernet Broadcast Network
+------------------------------------------------------
+Due to the default ARP behavior on Linux, it is not possible to have
+one system on two IP networks in the same Ethernet broadcast domain
+(non-partitioned switch) behave as expected. All Ethernet interfaces
+will respond to IP traffic for any IP address assigned to the system.
+This results in unbalanced receive traffic.
+
+If you have multiple interfaces in a server, do either of the following:
+
+ - Turn on ARP filtering by entering::
+
+ echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+
+ - Install the interfaces in separate broadcast domains - either in
+ different switches or in a switch partitioned to VLANs.
+
+UDP Stress Test Dropped Packet Issue
+--------------------------------------
+Under small packets UDP stress test with 10GbE driver, the Linux system
+may drop UDP packets due to the fullness of socket buffers. You may want
+to change the driver's Flow Control variables to the minimum value for
+controlling packet reception.
+
+Tx Hangs Possible Under Stress
+------------------------------
+Under stress conditions, if TX hangs occur, turning off TSO
+"ethtool -K eth0 tso off" may resolve the problem.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt
deleted file mode 100644
index 09f71d71920a..000000000000
--- a/Documentation/networking/ixgb.txt
+++ /dev/null
@@ -1,433 +0,0 @@
-Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
-=====================================================================
-
-March 14, 2011
-
-
-Contents
-========
-
-- In This Release
-- Identifying Your Adapter
-- Building and Installation
-- Command Line Parameters
-- Improving Performance
-- Additional Configurations
-- Known Issues/Troubleshooting
-- Support
-
-
-
-In This Release
-===============
-
-This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R)
-Network Connection. This driver includes support for Itanium(R)2-based
-systems.
-
-For questions related to hardware requirements, refer to the documentation
-supplied with your 10 Gigabit adapter. All hardware requirements listed apply
-to use with Linux.
-
-The following features are available in this kernel:
- - Native VLANs
- - Channel Bonding (teaming)
- - SNMP
-
-Channel Bonding documentation can be found in the Linux kernel source:
-/Documentation/networking/bonding.txt
-
-The driver information previously displayed in the /proc filesystem is not
-supported in this release. Alternatively, you can use ethtool (version 1.6
-or later), lspci, and iproute2 to obtain the same information.
-
-Instructions on updating ethtool can be found in the section "Additional
-Configurations" later in this document.
-
-
-Identifying Your Adapter
-========================
-
-The following Intel network adapters are compatible with the drivers in this
-release:
-
-Controller Adapter Name Physical Layer
----------- ------------ --------------
-82597EX Intel(R) PRO/10GbE LR/SR/CX4 10G Base-LR (1310 nm optical fiber)
- Server Adapters 10G Base-SR (850 nm optical fiber)
- 10G Base-CX4(twin-axial copper cabling)
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/network/sb/CS-012904.htm
-
-
-Building and Installation
-=========================
-
-select m for "Intel(R) PRO/10GbE support" located at:
- Location:
- -> Device Drivers
- -> Network device support (NETDEVICES [=y])
- -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
-1. make modules && make modules_install
-
-2. Load the module:
-
-    modprobe ixgb <parameter>=<value>
-
- The insmod command can be used if the full
- path to the driver module is specified. For example:
-
- insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgb/ixgb.ko
-
- With 2.6 based kernels also make sure that older ixgb drivers are
- removed from the kernel, before loading the new module:
-
- rmmod ixgb; modprobe ixgb
-
-3. Assign an IP address to the interface by entering the following, where
- x is the interface number:
-
- ip addr add ethx <IP_address>
-
-4. Verify that the interface works. Enter the following, where <IP_address>
- is the IP address for another machine on the same subnet as the interface
- that is being tested:
-
- ping <IP_address>
-
-
-Command Line Parameters
-=======================
-
-If the driver is built as a module, the following optional parameters are
-used by entering them on the command line with the modprobe command using
-this syntax:
-
- modprobe ixgb [<option>=<VAL1>,<VAL2>,...]
-
-For example, with two 10GbE PCI adapters, entering:
-
- modprobe ixgb TxDescriptors=80,128
-
-loads the ixgb driver with 80 TX resources for the first adapter and 128 TX
-resources for the second adapter.
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-FlowControl
-Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
-Default: Read from the EEPROM
- If EEPROM is not detected, default is 1
- This parameter controls the automatic generation(Tx) and response(Rx) to
- Ethernet PAUSE frames. There are hardware bugs associated with enabling
- Tx flow control so beware.
-
-RxDescriptors
-Valid Range: 64-512
-Default Value: 512
- This value is the number of receive descriptors allocated by the driver.
- Increasing this value allows the driver to buffer more incoming packets.
- Each descriptor is 16 bytes. A receive buffer is also allocated for
- each descriptor and can be either 2048, 4056, 8192, or 16384 bytes,
- depending on the MTU setting. When the MTU size is 1500 or less, the
- receive buffer size is 2048 bytes. When the MTU is greater than 1500 the
- receive buffer size will be either 4056, 8192, or 16384 bytes. The
- maximum MTU size is 16114.
-
-RxIntDelay
-Valid Range: 0-65535 (0=off)
-Default Value: 72
- This value delays the generation of receive interrupts in units of
- 0.8192 microseconds. Receive interrupt reduction can improve CPU
- efficiency if properly tuned for specific network traffic. Increasing
- this value adds extra latency to frame reception and can end up
- decreasing the throughput of TCP traffic. If the system is reporting
- dropped receives, this value may be set too high, causing the driver to
- run out of available receive descriptors.
-
-TxDescriptors
-Valid Range: 64-4096
-Default Value: 256
- This value is the number of transmit descriptors allocated by the driver.
- Increasing this value allows the driver to queue more transmits. Each
- descriptor is 16 bytes.
-
-XsumRX
-Valid Range: 0-1
-Default Value: 1
- A value of '1' indicates that the driver should enable IP checksum
- offload for received packets (both UDP and TCP) to the adapter hardware.
-
-
-Improving Performance
-=====================
-
-With the 10 Gigabit server adapters, the default Linux configuration will
-very likely limit the total available throughput artificially. There is a set
-of configuration changes that, when applied together, will increase the ability
-of Linux to transmit and receive data. The following enhancements were
-originally acquired from settings published at http://www.spec.org/web99/ for
-various submitted results using Linux.
-
-NOTE: These changes are only suggestions, and serve as a starting point for
- tuning your network performance.
-
-The changes are made in three major ways, listed in order of greatest effect:
-- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
- parameter.
-- Use sysctl to modify /proc parameters (essentially kernel tuning)
-- Use setpci to modify the MMRBC field in PCI-X configuration space to increase
- transmit burst lengths on the bus.
-
-NOTE: setpci modifies the adapter's configuration registers to allow it to read
-up to 4k bytes at a time (for transmits). However, for some systems the
-behavior after modifying this register may be undefined (possibly errors of
-some kind). A power-cycle, hard reset or explicitly setting the e6 register
-back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a
-stable configuration.
-
-- COPY these lines and paste them into ixgb_perf.sh:
-#!/bin/bash
-echo "configuring network performance , edit this file to change the interface
-or device ID of 10GbE card"
-# set mmrbc to 4k reads, modify only Intel 10GbE device IDs
-# replace 1a48 with appropriate 10GbE device's ID installed on the system,
-# if needed.
-setpci -d 8086:1a48 e6.b=2e
-# set the MTU (max transmission unit) - it requires your switch and clients
-# to change as well.
-# set the txqueuelen
-# your ixgb adapter should be loaded as eth1 for this to work, change if needed
-ip li set dev eth1 mtu 9000 txqueuelen 1000 up
-# call the sysctl utility to modify /proc/sys entries
-sysctl -p ./sysctl_ixgb.conf
-- END ixgb_perf.sh
-
-- COPY these lines and paste them into sysctl_ixgb.conf:
-# some of the defaults may be different for your kernel
-# call this file with sysctl -p <this file>
-# these are just suggested values that worked well to increase throughput in
-# several network benchmark tests, your mileage may vary
-
-### IPV4 specific settings
-# turn TCP timestamp support off, default 1, reduces CPU use
-net.ipv4.tcp_timestamps = 0
-# turn SACK support off, default on
-# on systems with a VERY fast bus -> memory interface this is the big gainer
-net.ipv4.tcp_sack = 0
-# set min/default/max TCP read buffer, default 4096 87380 174760
-net.ipv4.tcp_rmem = 10000000 10000000 10000000
-# set min/pressure/max TCP write buffer, default 4096 16384 131072
-net.ipv4.tcp_wmem = 10000000 10000000 10000000
-# set min/pressure/max TCP buffer space, default 31744 32256 32768
-net.ipv4.tcp_mem = 10000000 10000000 10000000
-
-### CORE settings (mostly for socket and UDP effect)
-# set maximum receive socket buffer size, default 131071
-net.core.rmem_max = 524287
-# set maximum send socket buffer size, default 131071
-net.core.wmem_max = 524287
-# set default receive socket buffer size, default 65535
-net.core.rmem_default = 524287
-# set default send socket buffer size, default 65535
-net.core.wmem_default = 524287
-# set maximum amount of option memory buffers, default 10240
-net.core.optmem_max = 524287
-# set number of unprocessed input packets before kernel starts dropping them; default 300
-net.core.netdev_max_backlog = 300000
-- END sysctl_ixgb.conf
-
-Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface
-your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's
-ID installed on the system.
-
-NOTE: Unless these scripts are added to the boot process, these changes will
- only last only until the next system reboot.
-
-
-Resolving Slow UDP Traffic
---------------------------
-If your server does not seem to be able to receive UDP traffic as fast as it
-can receive TCP traffic, it could be because Linux, by default, does not set
-the network stack buffers as large as they need to be to support high UDP
-transfer rates. One way to alleviate this problem is to allow more memory to
-be used by the IP stack to store incoming data.
-
-For instance, use the commands:
- sysctl -w net.core.rmem_max=262143
-and
- sysctl -w net.core.rmem_default=262143
-to increase the read buffer memory max and default to 262143 (256k - 1) from
-defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables
-will increase the amount of memory used by the network stack for receives, and
-can be increased significantly more if necessary for your application.
-
-
-Additional Configurations
-=========================
-
- Configuring the Driver on Different Distributions
- -------------------------------------------------
- Configuring a network driver to load properly when the system is started is
- distribution dependent. Typically, the configuration process involves adding
- an alias line to /etc/modprobe.conf as well as editing other system startup
- scripts and/or configuration files. Many popular Linux distributions ship
- with tools to make these changes for you. To learn the proper way to
- configure a network device for your system, refer to your distribution
- documentation. If during this process you are asked for the driver or module
- name, the name for the Linux Base Driver for the Intel 10GbE Family of
- Adapters is ixgb.
-
- Viewing Link Messages
- ---------------------
- Link messages will not be displayed to the console if the distribution is
- restricting system messages. In order to see network driver link messages on
- your console, set dmesg to eight by entering the following:
-
- dmesg -n 8
-
- NOTE: This setting is not saved across reboots.
-
-
- Jumbo Frames
- ------------
- The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
- enabled by changing the MTU to a value larger than the default of 1500.
- The maximum value for the MTU is 16114. Use the ip command to
- increase the MTU size. For example:
-
- ip li set dev ethx mtu 9000
-
- The maximum MTU setting for Jumbo Frames is 16114. This value coincides
- with the maximum Jumbo Frames size of 16128.
-
-
- ethtool
- -------
- The driver utilizes the ethtool interface for driver configuration and
- diagnostics, as well as displaying statistical information. The ethtool
- version 1.6 or later is required for this functionality.
-
- The latest release of ethtool can be found from
- https://www.kernel.org/pub/software/network/ethtool/
-
- NOTE: The ethtool version 1.6 only supports a limited set of ethtool options.
- Support for a more complete ethtool feature set can be enabled by
- upgrading to the latest version.
-
-
- NAPI
- ----
-
- NAPI (Rx polling mode) is supported in the ixgb driver. NAPI is enabled
- or disabled based on the configuration of the kernel. see CONFIG_IXGB_NAPI
-
- See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
-
-
-Known Issues/Troubleshooting
-============================
-
- NOTE: After installing the driver, if your Intel Network Connection is not
- working, verify in the "In This Release" section of the readme that you have
- installed the correct driver.
-
- Intel(R) PRO/10GbE CX4 Server Adapter Cable Interoperability Issue with
- Fujitsu XENPAK Module in SmartBits Chassis
- ---------------------------------------------------------------------
- Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4
- Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits
- chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni.
- The CRC errors may be received either by the Intel(R) PRO/10GbE CX4
- Server adapter or the SmartBits. If this situation occurs using a different
- cable assembly may resolve the issue.
-
- CX4 Server Adapter Cable Interoperability Issues with HP Procurve 3400cl
- Switch Port
- ------------------------------------------------------------------------
- Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server
- adapter is connected to an HP Procurve 3400cl switch port using short cables
- (1 m or shorter). If this situation occurs, using a longer cable may resolve
- the issue.
-
- Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that
- Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC
- errors may be received either by the CX4 Server adapter or at the switch. If
- this situation occurs, using a different cable assembly may resolve the issue.
-
-
- Jumbo Frames System Requirement
- -------------------------------
- Memory allocation failures have been observed on Linux systems with 64 MB
- of RAM or less that are running Jumbo Frames. If you are using Jumbo
- Frames, your system may require more than the advertised minimum
- requirement of 64 MB of system memory.
-
-
- Performance Degradation with Jumbo Frames
- -----------------------------------------
- Degradation in throughput performance may be observed in some Jumbo frames
- environments. If this is observed, increasing the application's socket buffer
- size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
- See the specific application manual and /usr/src/linux*/Documentation/
- networking/ip-sysctl.txt for more details.
-
-
- Allocating Rx Buffers when Using Jumbo Frames
- ---------------------------------------------
- Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if
- the available memory is heavily fragmented. This issue may be seen with PCI-X
- adapters or with packet split disabled. This can be reduced or eliminated
- by changing the amount of available memory for receive buffer allocation, by
- increasing /proc/sys/vm/min_free_kbytes.
-
-
- Multiple Interfaces on Same Ethernet Broadcast Network
- ------------------------------------------------------
- Due to the default ARP behavior on Linux, it is not possible to have
- one system on two IP networks in the same Ethernet broadcast domain
- (non-partitioned switch) behave as expected. All Ethernet interfaces
- will respond to IP traffic for any IP address assigned to the system.
- This results in unbalanced receive traffic.
-
- If you have multiple interfaces in a server, do either of the following:
-
- - Turn on ARP filtering by entering:
- echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
-
- - Install the interfaces in separate broadcast domains - either in
- different switches or in a switch partitioned to VLANs.
-
-
- UDP Stress Test Dropped Packet Issue
- --------------------------------------
- Under small packets UDP stress test with 10GbE driver, the Linux system
- may drop UDP packets due to the fullness of socket buffers. You may want
- to change the driver's Flow Control variables to the minimum value for
- controlling packet reception.
-
-
- Tx Hangs Possible Under Stress
- ------------------------------
- Under stress conditions, if TX hangs occur, turning off TSO
- "ethtool -K eth0 tso off" may resolve the problem.
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgbe.rst b/Documentation/networking/ixgbe.rst
new file mode 100644
index 000000000000..725fc697fd8f
--- /dev/null
+++ b/Documentation/networking/ixgbe.rst
@@ -0,0 +1,527 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
+=============================================================================
+
+Intel 10 Gigabit Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Known Issues
+- Support
+
+Identifying Your Adapter
+========================
+The driver is compatible with devices based on the following:
+
+ * Intel(R) Ethernet Controller 82598
+ * Intel(R) Ethernet Controller 82599
+ * Intel(R) Ethernet Controller X520
+ * Intel(R) Ethernet Controller X540
+ * Intel(R) Ethernet Controller x550
+ * Intel(R) Ethernet Controller X552
+ * Intel(R) Ethernet Controller X553
+
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+SFP+ Devices with Pluggable Optics
+----------------------------------
+
+82599-BASED ADAPTERS
+~~~~~~~~~~~~~~~~~~~~
+NOTES:
+- If your 82599-based Intel(R) Network Adapter came with Intel optics or is an
+Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel optics
+and/or the direct attach cables listed below.
+- When 82599-based SFP+ devices are connected back to back, they should be set
+to the same Speed setting via ethtool. Results may vary if you mix speed
+settings.
+
++---------------+---------------------------------------+------------------+
+| Supplier | Type | Part Numbers |
++===============+=======================================+==================+
+| SR Modules |
++---------------+---------------------------------------+------------------+
+| Intel | DUAL RATE 1G/10G SFP+ SR (bailed) | FTLX8571D3BCV-IT |
++---------------+---------------------------------------+------------------+
+| Intel | DUAL RATE 1G/10G SFP+ SR (bailed) | AFBR-703SDZ-IN2 |
++---------------+---------------------------------------+------------------+
+| Intel | DUAL RATE 1G/10G SFP+ SR (bailed) | AFBR-703SDDZ-IN1 |
++---------------+---------------------------------------+------------------+
+| LR Modules |
++---------------+---------------------------------------+------------------+
+| Intel | DUAL RATE 1G/10G SFP+ LR (bailed) | FTLX1471D3BCV-IT |
++---------------+---------------------------------------+------------------+
+| Intel | DUAL RATE 1G/10G SFP+ LR (bailed) | AFCT-701SDZ-IN2 |
++---------------+---------------------------------------+------------------+
+| Intel | DUAL RATE 1G/10G SFP+ LR (bailed) | AFCT-701SDDZ-IN1 |
++---------------+---------------------------------------+------------------+
+
+The following is a list of 3rd party SFP+ modules that have received some
+testing. Not all modules are applicable to all devices.
+
++---------------+---------------------------------------+------------------+
+| Supplier | Type | Part Numbers |
++===============+=======================================+==================+
+| Finisar | SFP+ SR bailed, 10g single rate | FTLX8571D3BCL |
++---------------+---------------------------------------+------------------+
+| Avago | SFP+ SR bailed, 10g single rate | AFBR-700SDZ |
++---------------+---------------------------------------+------------------+
+| Finisar | SFP+ LR bailed, 10g single rate | FTLX1471D3BCL |
++---------------+---------------------------------------+------------------+
+| Finisar | DUAL RATE 1G/10G SFP+ SR (No Bail) | FTLX8571D3QCV-IT |
++---------------+---------------------------------------+------------------+
+| Avago | DUAL RATE 1G/10G SFP+ SR (No Bail) | AFBR-703SDZ-IN1 |
++---------------+---------------------------------------+------------------+
+| Finisar | DUAL RATE 1G/10G SFP+ LR (No Bail) | FTLX1471D3QCV-IT |
++---------------+---------------------------------------+------------------+
+| Avago | DUAL RATE 1G/10G SFP+ LR (No Bail) | AFCT-701SDZ-IN1 |
++---------------+---------------------------------------+------------------+
+| Finisar | 1000BASE-T SFP | FCLF8522P2BTL |
++---------------+---------------------------------------+------------------+
+| Avago | 1000BASE-T | ABCU-5710RZ |
++---------------+---------------------------------------+------------------+
+| HP | 1000BASE-SX SFP | 453153-001 |
++---------------+---------------------------------------+------------------+
+
+82599-based adapters support all passive and active limiting direct attach
+cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
+
+Laser turns off for SFP+ when ifconfig ethX down
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"ifconfig ethX down" turns off the laser for 82599-based SFP+ fiber adapters.
+"ifconfig ethX up" turns on the laser.
+Alternatively, you can use "ip link set [down/up] dev ethX" to turn the
+laser off and on.
+
+
+82599-based QSFP+ Adapters
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+NOTES:
+- If your 82599-based Intel(R) Network Adapter came with Intel optics, it only
+supports Intel optics.
+- 82599-based QSFP+ adapters only support 4x10 Gbps connections. 1x40 Gbps
+connections are not supported. QSFP+ link partners must be configured for
+4x10 Gbps.
+- 82599-based QSFP+ adapters do not support automatic link speed detection.
+The link speed must be configured to either 10 Gbps or 1 Gbps to match the link
+partners speed capabilities. Incorrect speed configurations will result in
+failure to link.
+- Intel(R) Ethernet Converged Network Adapter X520-Q1 only supports the optics
+and direct attach cables listed below.
+
++---------------+---------------------------------------+------------------+
+| Supplier | Type | Part Numbers |
++===============+=======================================+==================+
+| Intel | DUAL RATE 1G/10G QSFP+ SRL (bailed) | E10GQSFPSR |
++---------------+---------------------------------------+------------------+
+
+82599-based QSFP+ adapters support all passive and active limiting QSFP+
+direct attach cables that comply with SFF-8436 v4.1 specifications.
+
+82598-BASED ADAPTERS
+~~~~~~~~~~~~~~~~~~~~
+NOTES:
+- Intel(r) Ethernet Network Adapters that support removable optical modules
+only support their original module type (for example, the Intel(R) 10 Gigabit
+SR Dual Port Express Module only supports SR optical modules). If you plug in
+a different type of module, the driver will not load.
+- Hot Swapping/hot plugging optical modules is not supported.
+- Only single speed, 10 gigabit modules are supported.
+- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module
+types are not supported. Please see your system documentation for details.
+
+The following is a list of SFP+ modules and direct attach cables that have
+received some testing. Not all modules are applicable to all devices.
+
++---------------+---------------------------------------+------------------+
+| Supplier | Type | Part Numbers |
++===============+=======================================+==================+
+| Finisar | SFP+ SR bailed, 10g single rate | FTLX8571D3BCL |
++---------------+---------------------------------------+------------------+
+| Avago | SFP+ SR bailed, 10g single rate | AFBR-700SDZ |
++---------------+---------------------------------------+------------------+
+| Finisar | SFP+ LR bailed, 10g single rate | FTLX1471D3BCL |
++---------------+---------------------------------------+------------------+
+
+82598-based adapters support all passive direct attach cables that comply with
+SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach cables
+are not supported.
+
+Third party optic modules and cables referred to above are listed only for the
+purpose of highlighting third party specifications and potential
+compatibility, and are not recommendations or endorsements or sponsorship of
+any third party's product by Intel. Intel is not endorsing or promoting
+products made by any third party and the third party reference is provided
+only to share information regarding certain optic modules and cables with the
+above specifications. There may be other manufacturers or suppliers, producing
+or supplying optic modules and cables with similar or matching descriptions.
+Customers must use their own discretion and diligence to purchase optic
+modules and cables from any third party of their choice. Customers are solely
+responsible for assessing the suitability of the product and/or devices and
+for the selection of the vendor for purchasing any product. THE OPTIC MODULES
+AND CABLES REFERRED TO ABOVE ARE NOT WARRANTED OR SUPPORTED BY INTEL. INTEL
+ASSUMES NO LIABILITY WHATSOEVER, AND INTEL DISCLAIMS ANY EXPRESS OR IMPLIED
+WARRANTY, RELATING TO SALE AND/OR USE OF SUCH THIRD PARTY PRODUCTS OR
+SELECTION OF VENDOR BY CUSTOMERS.
+
+Command Line Parameters
+=======================
+
+max_vfs
+-------
+:Valid Range: 1-63
+
+This parameter adds support for SR-IOV. It causes the driver to spawn up to
+max_vfs worth of virtual functions.
+If the value is greater than 0 it will also force the VMDq parameter to be 1 or
+more.
+
+NOTE: This parameter is only used on kernel 3.7.x and below. On kernel 3.8.x
+and above, use sysfs to enable VFs. Also, for Red Hat distributions, this
+parameter is only used on version 6.6 and older. For version 6.7 and newer, use
+sysfs. For example::
+
+ #echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs // enable VFs
+ #echo 0 > /sys/class/net/$dev/device/sriov_numvfs //disable VFs
+
+The parameters for the driver are referenced by position. Thus, if you have a
+dual port adapter, or more than one adapter in your system, and want N virtual
+functions per port, you must specify a number for each port with each parameter
+separated by a comma. For example::
+
+ modprobe ixgbe max_vfs=4
+
+This will spawn 4 VFs on the first port.
+
+::
+
+ modprobe ixgbe max_vfs=2,4
+
+This will spawn 2 VFs on the first port and 4 VFs on the second port.
+
+NOTE: Caution must be used in loading the driver with these parameters.
+Depending on your system configuration, number of slots, etc., it is impossible
+to predict in all cases where the positions would be on the command line.
+
+NOTE: Neither the device nor the driver control how VFs are mapped into config
+space. Bus layout will vary by operating system. On operating systems that
+support it, you can check sysfs to find the mapping.
+
+NOTE: When either SR-IOV mode or VMDq mode is enabled, hardware VLAN filtering
+and VLAN tag stripping/insertion will remain enabled. Please remove the old
+VLAN filter before the new VLAN filter is added. For example,
+
+::
+
+ ip link set eth0 vf 0 vlan 100 // set VLAN 100 for VF 0
+ ip link set eth0 vf 0 vlan 0 // Delete VLAN 100
+ ip link set eth0 vf 0 vlan 200 // set a new VLAN 200 for VF 0
+
+With kernel 3.6, the driver supports the simultaneous usage of max_vfs and DCB
+features, subject to the constraints described below. Prior to kernel 3.6, the
+driver did not support the simultaneous operation of max_vfs greater than 0 and
+the DCB features (multiple traffic classes utilizing Priority Flow Control and
+Extended Transmission Selection).
+
+When DCB is enabled, network traffic is transmitted and received through
+multiple traffic classes (packet buffers in the NIC). The traffic is associated
+with a specific class based on priority, which has a value of 0 through 7 used
+in the VLAN tag. When SR-IOV is not enabled, each traffic class is associated
+with a set of receive/transmit descriptor queue pairs. The number of queue
+pairs for a given traffic class depends on the hardware configuration. When
+SR-IOV is enabled, the descriptor queue pairs are grouped into pools. The
+Physical Function (PF) and each Virtual Function (VF) is allocated a pool of
+receive/transmit descriptor queue pairs. When multiple traffic classes are
+configured (for example, DCB is enabled), each pool contains a queue pair from
+each traffic class. When a single traffic class is configured in the hardware,
+the pools contain multiple queue pairs from the single traffic class.
+
+The number of VFs that can be allocated depends on the number of traffic
+classes that can be enabled. The configurable number of traffic classes for
+each enabled VF is as follows:
+0 - 15 VFs = Up to 8 traffic classes, depending on device support
+16 - 31 VFs = Up to 4 traffic classes
+32 - 63 VFs = 1 traffic class
+
+When VFs are configured, the PF is allocated one pool as well. The PF supports
+the DCB features with the constraint that each traffic class will only use a
+single queue pair. When zero VFs are configured, the PF can support multiple
+queue pairs per traffic class.
+
+allow_unsupported_sfp
+---------------------
+:Valid Range: 0,1
+:Default Value: 0 (disabled)
+
+This parameter allows unsupported and untested SFP+ modules on 82599-based
+adapters, as long as the type of module is known to the driver.
+
+debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level of debug messages displayed in the system
+logs.
+
+
+Additional Features and Configurations
+======================================
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for ixgbe. When transmit is enabled,
+pause frames are generated when the receive packet buffer crosses a predefined
+threshold. When receive is enabled, the transmit unit will halt for the time
+delay specified when a pause frame is received.
+
+NOTE: You must have a flow control capable link partner.
+
+Flow Control is enabled by default.
+
+Use ethtool to change the flow control settings. To enable or disable Rx or
+Tx Flow Control::
+
+ ethtool -A eth? rx <on|off> tx <on|off>
+
+Note: This command only enables or disables Flow Control if auto-negotiation is
+disabled. If auto-negotiation is enabled, this command changes the parameters
+used for auto-negotiation with the link partner.
+
+To enable or disable auto-negotiation::
+
+ ethtool -s eth? autoneg <on|off>
+
+Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending
+on your device, you may not be able to change the auto-negotiation setting.
+
+NOTE: For 82598 backplane cards entering 1 gigabit mode, flow control default
+behavior is changed to off. Flow control in 1 gigabit mode on these devices can
+lead to transmit hangs.
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+The Intel Ethernet Flow Director performs the following tasks:
+
+- Directs receive packets according to their flows to different queues.
+- Enables tight control on routing a flow in the platform.
+- Matches flows and CPU cores for flow affinity.
+- Supports multiple parameters for flexible flow classification and load
+ balancing (in SFP mode only).
+
+NOTE: Intel Ethernet Flow Director masking works in the opposite manner from
+subnet masking. In the following command::
+
+ #ethtool -N eth11 flow-type ip4 src-ip 172.4.1.2 m 255.0.0.0 dst-ip \
+ 172.21.1.1 m 255.128.0.0 action 31
+
+The src-ip value that is written to the filter will be 0.4.1.2, not 172.0.0.0
+as might be expected. Similarly, the dst-ip value written to the filter will be
+0.21.1.1, not 172.0.0.0.
+
+To enable or disable the Intel Ethernet Flow Director::
+
+ # ethtool -K ethX ntuple <on|off>
+
+When disabling ntuple filters, all the user programmed filters are flushed from
+the driver cache and hardware. All needed filters must be re-added when ntuple
+is re-enabled.
+
+To add a filter that directs packet to queue 2, use -U or -N switch::
+
+ # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \
+ 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1]
+
+To see the list of filters currently present::
+
+ # ethtool <-u|-n> ethX
+
+Sideband Perfect Filters
+------------------------
+Sideband Perfect Filters are used to direct traffic that matches specified
+characteristics. They are enabled through ethtool's ntuple interface. To add a
+new filter use the following command::
+
+ ethtool -U <device> flow-type <type> src-ip <ip> dst-ip <ip> src-port <port> \
+ dst-port <port> action <queue>
+
+Where:
+ <device> - the ethernet device to program
+ <type> - can be ip4, tcp4, udp4, or sctp4
+ <ip> - the IP address to match on
+ <port> - the port number to match on
+ <queue> - the queue to direct traffic towards (-1 discards the matched traffic)
+
+Use the following command to delete a filter::
+
+ ethtool -U <device> delete <N>
+
+Where <N> is the filter id displayed when printing all the active filters, and
+may also have been specified using "loc <N>" when adding the filter.
+
+The following example matches TCP traffic sent from 192.168.0.1, port 5300,
+directed to 192.168.0.5, port 80, and sends it to queue 7::
+
+ ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 \
+ src-port 5300 dst-port 80 action 7
+
+For each flow-type, the programmed filters must all have the same matching
+input set. For example, issuing the following two commands is acceptable::
+
+ ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10
+
+Issuing the next two commands, however, is not acceptable, since the first
+specifies src-ip and the second specifies dst-ip::
+
+ ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7
+ ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10
+
+The second command will fail with an error. You may program multiple filters
+with the same fields, using different values, but, on one device, you may not
+program two TCP4 filters with different matching fields.
+
+Matching on a sub-portion of a field is not supported by the ixgbe driver, thus
+partial mask fields are not supported.
+
+To create filters that direct traffic to a specific Virtual Function, use the
+"user-def" parameter. Specify the user-def as a 64 bit value, where the lower 32
+bits represents the queue number, while the next 8 bits represent which VF.
+Note that 0 is the PF, so the VF identifier is offset by 1. For example::
+
+ ... user-def 0x800000002 ...
+
+specifies to direct traffic to Virtual Function 7 (8 minus 1) into queue 2 of
+that VF.
+
+Note that these filters will not break internal routing rules, and will not
+route traffic that otherwise would not have been sent to the specified Virtual
+Function.
+
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU)
+to a value larger than the default value of 1500.
+
+Use the ifconfig command to increase the MTU size. For example, enter the
+following where <x> is the interface number::
+
+ ifconfig eth<x> mtu 9000 up
+
+Alternatively, you can use the ip command as follows::
+
+ ip link set mtu 9000 dev eth<x>
+ ip link set up dev eth<x>
+
+This setting is not saved across reboots. The setting change can be made
+permanent by adding 'MTU=9000' to the file::
+
+ /etc/sysconfig/network-scripts/ifcfg-eth<x> // for RHEL
+ /etc/sysconfig/network/<config_file> // for SLES
+
+NOTE: The maximum MTU setting for Jumbo Frames is 9710. This value coincides
+with the maximum Jumbo Frames size of 9728 bytes.
+
+NOTE: This driver will attempt to use multiple page sized buffers to receive
+each jumbo packet. This should help to avoid buffer starvation issues when
+allocating receive packets.
+
+NOTE: For 82599-based network connections, if you are enabling jumbo frames in
+a virtual function (VF), jumbo frames must first be enabled in the physical
+function (PF). The VF MTU setting cannot be larger than the PF MTU.
+
+Generic Receive Offload, aka GRO
+--------------------------------
+The driver supports the in-kernel software implementation of GRO. GRO has
+shown that by coalescing Rx traffic into larger chunks of data, CPU
+utilization can be significantly reduced when under large Rx load. GRO is an
+evolution of the previously-used LRO interface. GRO is able to coalesce
+other protocols besides TCP. It's also safe to use with configurations that
+are problematic for LRO, namely bridging and iSCSI.
+
+Data Center Bridging (DCB)
+--------------------------
+NOTE:
+The kernel assumes that TC0 is available, and will disable Priority Flow
+Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is
+enabled when setting up DCB on your switch.
+
+DCB is a configuration Quality of Service implementation in hardware. It uses
+the VLAN priority tag (802.1p) to filter traffic. That means that there are 8
+different priorities that traffic can be filtered into. It also enables
+priority flow control (802.1Qbb) which can limit or eliminate the number of
+dropped packets during network stress. Bandwidth can be allocated to each of
+these priorities, which is enforced at the hardware level (802.1Qaz).
+
+Adapter firmware implements LLDP and DCBX protocol agents as per 802.1AB and
+802.1Qaz respectively. The firmware based DCBX agent runs in willing mode only
+and can accept settings from a DCBX capable peer. Software configuration of
+DCBX parameters via dcbtool/lldptool are not supported.
+
+The ixgbe driver implements the DCB netlink interface layer to allow user-space
+to communicate with the driver and query DCB configuration for the port.
+
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information. The latest ethtool
+version is required for this functionality. Download it at:
+https://www.kernel.org/pub/software/network/ethtool/
+
+FCoE
+----
+The ixgbe driver supports Fiber Channel over Ethernet (FCoE) and Data Center
+Bridging (DCB). This code has no default effect on the regular driver
+operation. Configuring DCB and FCoE is outside the scope of this README. Refer
+to http://www.open-fcoe.org/ for FCoE project information and contact
+ixgbe-eedc@lists.sourceforge.net for DCB information.
+
+MAC and VLAN anti-spoofing feature
+----------------------------------
+When a malicious driver attempts to send a spoofed packet, it is dropped by the
+hardware and not transmitted.
+
+An interrupt is sent to the PF driver notifying it of the spoof attempt. When a
+spoofed packet is detected, the PF driver will send the following message to
+the system log (displayed by the "dmesg" command)::
+
+ ixgbe ethX: ixgbe_spoof_check: n spoofed packets detected
+
+where "x" is the PF interface number; and "n" is number of spoofed packets.
+NOTE: This feature can be disabled for a specific Virtual Function (VF)::
+
+ ip link set <pf dev> vf <vf id> spoofchk {off|on}
+
+
+Known Issues/Troubleshooting
+============================
+
+Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS
+-----------------------------------------------------------------------
+Linux KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM.
+This includes traditional PCIe devices, as well as SR-IOV-capable devices based
+on the Intel Ethernet Controller XL710.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt
deleted file mode 100644
index 687835415707..000000000000
--- a/Documentation/networking/ixgbe.txt
+++ /dev/null
@@ -1,349 +0,0 @@
-Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Family of
-Adapters
-=============================================================================
-
-Intel 10 Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Additional Configurations
-- Performance Tuning
-- Known Issues
-- Support
-
-Identifying Your Adapter
-========================
-
-The driver in this release is compatible with 82598, 82599 and X540-based
-Intel Network Connections.
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/network/sb/CS-012904.htm
-
-SFP+ Devices with Pluggable Optics
-----------------------------------
-
-82599-BASED ADAPTERS
-
-NOTES: If your 82599-based Intel(R) Network Adapter came with Intel optics, or
-is an Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel
-optics and/or the direct attach cables listed below.
-
-When 82599-based SFP+ devices are connected back to back, they should be set to
-the same Speed setting via ethtool. Results may vary if you mix speed settings.
-82598-based adapters support all passive direct attach cables that comply
-with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach
-cables are not supported.
-
-Supplier Type Part Numbers
-
-SR Modules
-Intel DUAL RATE 1G/10G SFP+ SR (bailed) FTLX8571D3BCV-IT
-Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDDZ-IN1
-Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDZ-IN2
-LR Modules
-Intel DUAL RATE 1G/10G SFP+ LR (bailed) FTLX1471D3BCV-IT
-Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDDZ-IN1
-Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDZ-IN2
-
-The following is a list of 3rd party SFP+ modules and direct attach cables that
-have received some testing. Not all modules are applicable to all devices.
-
-Supplier Type Part Numbers
-
-Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL
-Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ
-Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL
-
-Finisar DUAL RATE 1G/10G SFP+ SR (No Bail) FTLX8571D3QCV-IT
-Avago DUAL RATE 1G/10G SFP+ SR (No Bail) AFBR-703SDZ-IN1
-Finisar DUAL RATE 1G/10G SFP+ LR (No Bail) FTLX1471D3QCV-IT
-Avago DUAL RATE 1G/10G SFP+ LR (No Bail) AFCT-701SDZ-IN1
-Finistar 1000BASE-T SFP FCLF8522P2BTL
-Avago 1000BASE-T SFP ABCU-5710RZ
-
-82599-based adapters support all passive and active limiting direct attach
-cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
-
-Laser turns off for SFP+ when device is down
--------------------------------------------
-"ip link set down" turns off the laser for 82599-based SFP+ fiber adapters.
-"ip link set up" turns on the laser.
-
-
-82598-BASED ADAPTERS
-
-NOTES for 82598-Based Adapters:
-- Intel(R) Network Adapters that support removable optical modules only support
- their original module type (i.e., the Intel(R) 10 Gigabit SR Dual Port
- Express Module only supports SR optical modules). If you plug in a different
- type of module, the driver will not load.
-- Hot Swapping/hot plugging optical modules is not supported.
-- Only single speed, 10 gigabit modules are supported.
-- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module
- types are not supported. Please see your system documentation for details.
-
-The following is a list of 3rd party SFP+ modules and direct attach cables that
-have received some testing. Not all modules are applicable to all devices.
-
-Supplier Type Part Numbers
-
-Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL
-Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ
-Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL
-
-82598-based adapters support all passive direct attach cables that comply
-with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach
-cables are not supported.
-
-
-Flow Control
-------------
-Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
-receiving and transmitting pause frames for ixgbe. When TX is enabled, PAUSE
-frames are generated when the receive packet buffer crosses a predefined
-threshold. When rx is enabled, the transmit unit will halt for the time delay
-specified when a PAUSE frame is received.
-
-Flow Control is enabled by default. If you want to disable a flow control
-capable link partner, use ethtool:
-
- ethtool -A eth? autoneg off RX off TX off
-
-NOTE: For 82598 backplane cards entering 1 gig mode, flow control default
-behavior is changed to off. Flow control in 1 gig mode on these devices can
-lead to Tx hangs.
-
-Intel(R) Ethernet Flow Director
--------------------------------
-Supports advanced filters that direct receive packets by their flows to
-different queues. Enables tight control on routing a flow in the platform.
-Matches flows and CPU cores for flow affinity. Supports multiple parameters
-for flexible flow classification and load balancing.
-
-Flow director is enabled only if the kernel is multiple TX queue capable.
-
-An included script (set_irq_affinity.sh) automates setting the IRQ to CPU
-affinity.
-
-You can verify that the driver is using Flow Director by looking at the counter
-in ethtool: fdir_miss and fdir_match.
-
-Other ethtool Commands:
-To enable Flow Director
- ethtool -K ethX ntuple on
-To add a filter
- Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 10.0.128.23
- action 1
-To see the list of filters currently present:
- ethtool -u ethX
-
-Perfect Filter: Perfect filter is an interface to load the filter table that
-funnels all flow into queue_0 unless an alternative queue is specified using
-"action". In that case, any flow that matches the filter criteria will be
-directed to the appropriate queue.
-
-If the queue is defined as -1, filter will drop matching packets.
-
-To account for filter matches and misses, there are two stats in ethtool:
-fdir_match and fdir_miss. In addition, rx_queue_N_packets shows the number of
-packets processed by the Nth queue.
-
-NOTE: Receive Packet Steering (RPS) and Receive Flow Steering (RFS) are not
-compatible with Flow Director. IF Flow Director is enabled, these will be
-disabled.
-
-The following three parameters impact Flow Director.
-
-FdirMode
---------
-Valid Range: 0-2 (0=off, 1=ATR, 2=Perfect filter mode)
-Default Value: 1
-
- Flow Director filtering modes.
-
-FdirPballoc
------------
-Valid Range: 0-2 (0=64k, 1=128k, 2=256k)
-Default Value: 0
-
- Flow Director allocated packet buffer size.
-
-AtrSampleRate
---------------
-Valid Range: 1-100
-Default Value: 20
-
- Software ATR Tx packet sample rate. For example, when set to 20, every 20th
- packet, looks to see if the packet will create a new flow.
-
-Node
-----
-Valid Range: 0-n
-Default Value: 1 (off)
-
- 0 - n: where n is the number of NUMA nodes (i.e. 0 - 3) currently online in
- your system
- 1: turns this option off
-
- The Node parameter will allow you to pick which NUMA node you want to have
- the adapter allocate memory on.
-
-max_vfs
--------
-Valid Range: 1-63
-Default Value: 0
-
- If the value is greater than 0 it will also force the VMDq parameter to be 1
- or more.
-
- This parameter adds support for SR-IOV. It causes the driver to spawn up to
- max_vfs worth of virtual function.
-
-
-Additional Configurations
-=========================
-
- Jumbo Frames
- ------------
- The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
- enabled by changing the MTU to a value larger than the default of 1500.
- The maximum value for the MTU is 16110. Use the ip command to
- increase the MTU size. For example:
-
- ip link set dev ethx mtu 9000
-
- The maximum MTU setting for Jumbo Frames is 9710. This value coincides
- with the maximum Jumbo Frames size of 9728.
-
- Generic Receive Offload, aka GRO
- --------------------------------
- The driver supports the in-kernel software implementation of GRO. GRO has
- shown that by coalescing Rx traffic into larger chunks of data, CPU
- utilization can be significantly reduced when under large Rx load. GRO is an
- evolution of the previously-used LRO interface. GRO is able to coalesce
- other protocols besides TCP. It's also safe to use with configurations that
- are problematic for LRO, namely bridging and iSCSI.
-
- Data Center Bridging, aka DCB
- -----------------------------
- DCB is a configuration Quality of Service implementation in hardware.
- It uses the VLAN priority tag (802.1p) to filter traffic. That means
- that there are 8 different priorities that traffic can be filtered into.
- It also enables priority flow control which can limit or eliminate the
- number of dropped packets during network stress. Bandwidth can be
- allocated to each of these priorities, which is enforced at the hardware
- level.
-
- To enable DCB support in ixgbe, you must enable the DCB netlink layer to
- allow the userspace tools (see below) to communicate with the driver.
- This can be found in the kernel configuration here:
-
- -> Networking support
- -> Networking options
- -> Data Center Bridging support
-
- Once this is selected, DCB support must be selected for ixgbe. This can
- be found here:
-
- -> Device Drivers
- -> Network device support (NETDEVICES [=y])
- -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
- -> Intel(R) 10GbE PCI Express adapters support
- -> Data Center Bridging (DCB) Support
-
- After these options are selected, you must rebuild your kernel and your
- modules.
-
- In order to use DCB, userspace tools must be downloaded and installed.
- The dcbd tools can be found at:
-
- http://e1000.sf.net
-
- Ethtool
- -------
- The driver utilizes the ethtool interface for driver configuration and
- diagnostics, as well as displaying statistical information. The latest
- ethtool version is required for this functionality.
-
- The latest release of ethtool can be found from
- https://www.kernel.org/pub/software/network/ethtool/
-
- FCoE
- ----
- This release of the ixgbe driver contains new code to enable users to use
- Fiber Channel over Ethernet (FCoE) and Data Center Bridging (DCB)
- functionality that is supported by the 82598-based hardware. This code has
- no default effect on the regular driver operation, and configuring DCB and
- FCoE is outside the scope of this driver README. Refer to
- http://www.open-fcoe.org/ for FCoE project information and contact
- e1000-eedc@lists.sourceforge.net for DCB information.
-
- MAC and VLAN anti-spoofing feature
- ----------------------------------
- When a malicious driver attempts to send a spoofed packet, it is dropped by
- the hardware and not transmitted. An interrupt is sent to the PF driver
- notifying it of the spoof attempt.
-
- When a spoofed packet is detected the PF driver will send the following
- message to the system log (displayed by the "dmesg" command):
-
- Spoof event(s) detected on VF (n)
-
- Where n=the VF that attempted to do the spoofing.
-
-
-Performance Tuning
-==================
-
-An excellent article on performance tuning can be found at:
-
-http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
-
-
-Known Issues
-============
-
- Enabling SR-IOV in a 32-bit or 64-bit Microsoft* Windows* Server 2008/R2
- Guest OS using Intel (R) 82576-based GbE or Intel (R) 82599-based 10GbE
- controller under KVM
- ------------------------------------------------------------------------
- KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
- includes traditional PCIe devices, as well as SR-IOV-capable devices using
- Intel 82576-based and 82599-based controllers.
-
- While direct assignment of a PCIe device or an SR-IOV Virtual Function (VF)
- to a Linux-based VM running 2.6.32 or later kernel works fine, there is a
- known issue with Microsoft Windows Server 2008 VM that results in a "yellow
- bang" error. This problem is within the KVM VMM itself, not the Intel driver,
- or the SR-IOV logic of the VMM, but rather that KVM emulates an older CPU
- model for the guests, and this older CPU model does not support MSI-X
- interrupts, which is a requirement for Intel SR-IOV.
-
- If you wish to use the Intel 82576 or 82599-based controllers in SR-IOV mode
- with KVM and a Microsoft Windows Server 2008 guest try the following
- workaround. The workaround is to tell KVM to emulate a different model of CPU
- when using qemu to create the KVM guest:
-
- "-cpu qemu64,model=13"
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://e1000.sourceforge.net
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgbevf.rst b/Documentation/networking/ixgbevf.rst
new file mode 100644
index 000000000000..56cde6366c2f
--- /dev/null
+++ b/Documentation/networking/ixgbevf.rst
@@ -0,0 +1,66 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet
+=============================================================
+
+Intel 10 Gigabit Virtual Function Linux driver.
+Copyright(c) 1999-2018 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Known Issues
+- Support
+
+This driver supports 82599, X540, X550, and X552-based virtual function devices
+that can only be activated on kernels that support SR-IOV.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel adapter. All hardware requirements listed apply to use
+with Linux.
+
+
+Identifying Your Adapter
+========================
+The driver is compatible with devices based on the following:
+
+ * Intel(R) Ethernet Controller 82598
+ * Intel(R) Ethernet Controller 82599
+ * Intel(R) Ethernet Controller X520
+ * Intel(R) Ethernet Controller X540
+ * Intel(R) Ethernet Controller x550
+ * Intel(R) Ethernet Controller X552
+ * Intel(R) Ethernet Controller X553
+
+For information on how to identify your adapter, and for the latest Intel
+network drivers, refer to the Intel Support website:
+https://www.intel.com/support
+
+Known Issues/Troubleshooting
+============================
+
+SR-IOV requires the correct platform and OS support.
+
+The guest OS loading this driver must support MSI-X interrupts.
+
+This driver is only supported as a loadable module at this time. Intel is not
+supplying patches against the kernel source to allow for static linking of the
+drivers.
+
+VLANs: There is a limit of a total of 64 shared VLANs to 1 or more VFs.
+
+
+Support
+=======
+For general information, go to the Intel support website at:
+
+https://www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+https://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-devel@lists.sf.net.
diff --git a/Documentation/networking/ixgbevf.txt b/Documentation/networking/ixgbevf.txt
deleted file mode 100644
index 53d8d2a5a6a3..000000000000
--- a/Documentation/networking/ixgbevf.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
-
-Intel Gigabit Linux driver.
-Copyright(c) 1999 - 2013 Intel Corporation.
-
-Contents
-========
-
-- Identifying Your Adapter
-- Known Issues/Troubleshooting
-- Support
-
-This file describes the ixgbevf Linux* Base Driver for Intel Network
-Connection.
-
-The ixgbevf driver supports 82599-based virtual function devices that can only
-be activated on kernels with CONFIG_PCI_IOV enabled.
-
-The ixgbevf driver supports virtual functions generated by the ixgbe driver
-with a max_vfs value of 1 or greater.
-
-The guest OS loading the ixgbevf driver must support MSI-X interrupts.
-
-VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
-
-Identifying Your Adapter
-========================
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
- http://support.intel.com/support/go/network/adapter/idguide.htm
-
-Known Issues/Troubleshooting
-============================
-
-
-Support
-=======
-
-For general information, go to the Intel support website at:
-
- http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
- http://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0
deleted file mode 100644
index f9158e831e79..000000000000
--- a/LICENSES/other/CC-BY-SA-4.0
+++ /dev/null
@@ -1,397 +0,0 @@
-Valid-License-Identifier: CC-BY-SA-4.0
-SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0
-Usage-Guide:
- To use the Creative Commons Attribution Share Alike 4.0 International
- license put the following SPDX tag/value pair into a comment according to
- the placement guidelines in the licensing rules documentation:
- SPDX-License-Identifier: CC-BY-SA-4.0
-License-Text:
-
-Creative Commons Attribution-ShareAlike 4.0 International
-
-Creative Commons Corporation ("Creative Commons") is not a law firm and
-does not provide legal services or legal advice. Distribution of Creative
-Commons public licenses does not create a lawyer-client or other
-relationship. Creative Commons makes its licenses and related information
-available on an "as-is" basis. Creative Commons gives no warranties
-regarding its licenses, any material licensed under their terms and
-conditions, or any related information. Creative Commons disclaims all
-liability for damages resulting from their use to the fullest extent
-possible.
-
-Using Creative Commons Public Licenses
-
-Creative Commons public licenses provide a standard set of terms and
-conditions that creators and other rights holders may use to share original
-works of authorship and other material subject to copyright and certain
-other rights specified in the public license below. The following
-considerations are for informational purposes only, are not exhaustive, and
-do not form part of our licenses.
-
-Considerations for licensors: Our public licenses are intended for use by
-those authorized to give the public permission to use material in ways
-otherwise restricted by copyright and certain other rights. Our licenses
-are irrevocable. Licensors should read and understand the terms and
-conditions of the license they choose before applying it. Licensors should
-also secure all rights necessary before applying our licenses so that the
-public can reuse the material as expected. Licensors should clearly mark
-any material not subject to the license. This includes other CC-licensed
-material, or material used under an exception or limitation to
-copyright. More considerations for licensors :
-wiki.creativecommons.org/Considerations_for_licensors
-
-Considerations for the public: By using one of our public licenses, a
-licensor grants the public permission to use the licensed material under
-specified terms and conditions. If the licensor's permission is not
-necessary for any reason - for example, because of any applicable exception
-or limitation to copyright - then that use is not regulated by the
-license. Our licenses grant only permissions under copyright and certain
-other rights that a licensor has authority to grant. Use of the licensed
-material may still be restricted for other reasons, including because
-others have copyright or other rights in the material. A licensor may make
-special requests, such as asking that all changes be marked or described.
-
-Although not required by our licenses, you are encouraged to respect those
-requests where reasonable. More considerations for the public :
-wiki.creativecommons.org/Considerations_for_licensees
-
-Creative Commons Attribution-ShareAlike 4.0 International Public License
-
-By exercising the Licensed Rights (defined below), You accept and agree to
-be bound by the terms and conditions of this Creative Commons
-Attribution-ShareAlike 4.0 International Public License ("Public
-License"). To the extent this Public License may be interpreted as a
-contract, You are granted the Licensed Rights in consideration of Your
-acceptance of these terms and conditions, and the Licensor grants You such
-rights in consideration of benefits the Licensor receives from making the
-Licensed Material available under these terms and conditions.
-
-Section 1 - Definitions.
-
- a. Adapted Material means material subject to Copyright and Similar
- Rights that is derived from or based upon the Licensed Material and
- in which the Licensed Material is translated, altered, arranged,
- transformed, or otherwise modified in a manner requiring permission
- under the Copyright and Similar Rights held by the Licensor. For
- purposes of this Public License, where the Licensed Material is a
- musical work, performance, or sound recording, Adapted Material is
- always produced where the Licensed Material is synched in timed
- relation with a moving image.
-
- b. Adapter's License means the license You apply to Your Copyright and
- Similar Rights in Your contributions to Adapted Material in
- accordance with the terms and conditions of this Public License.
-
- c. BY-SA Compatible License means a license listed at
- creativecommons.org/compatiblelicenses, approved by Creative Commons
- as essentially the equivalent of this Public License.
-
- d. Copyright and Similar Rights means copyright and/or similar rights
- closely related to copyright including, without limitation,
- performance, broadcast, sound recording, and Sui Generis Database
- Rights, without regard to how the rights are labeled or
- categorized. For purposes of this Public License, the rights
- specified in Section 2(b)(1)-(2) are not Copyright and Similar
- Rights.
-
- e. Effective Technological Measures means those measures that, in the
- absence of proper authority, may not be circumvented under laws
- fulfilling obligations under Article 11 of the WIPO Copyright Treaty
- adopted on December 20, 1996, and/or similar international
- agreements.
-
- f. Exceptions and Limitations means fair use, fair dealing, and/or any
- other exception or limitation to Copyright and Similar Rights that
- applies to Your use of the Licensed Material.
-
- g. License Elements means the license attributes listed in the name of
- a Creative Commons Public License. The License Elements of this
- Public License are Attribution and ShareAlike.
-
- h. Licensed Material means the artistic or literary work, database, or
- other material to which the Licensor applied this Public License.
-
- i. Licensed Rights means the rights granted to You subject to the terms
- and conditions of this Public License, which are limited to all
- Copyright and Similar Rights that apply to Your use of the Licensed
- Material and that the Licensor has authority to license.
-
- j. Licensor means the individual(s) or entity(ies) granting rights
- under this Public License.
-
- k. Share means to provide material to the public by any means or
- process that requires permission under the Licensed Rights, such as
- reproduction, public display, public performance, distribution,
- dissemination, communication, or importation, and to make material
- available to the public including in ways that members of the public
- may access the material from a place and at a time individually
- chosen by them.
-
- l. Sui Generis Database Rights means rights other than copyright
- resulting from Directive 96/9/EC of the European Parliament and of
- the Council of 11 March 1996 on the legal protection of databases,
- as amended and/or succeeded, as well as other essentially equivalent
- rights anywhere in the world. m. You means the individual or entity
- exercising the Licensed Rights under this Public License. Your has a
- corresponding meaning.
-
-Section 2 - Scope.
-
- a. License grant.
-
- 1. Subject to the terms and conditions of this Public License, the
- Licensor hereby grants You a worldwide, royalty-free,
- non-sublicensable, non-exclusive, irrevocable license to
- exercise the Licensed Rights in the Licensed Material to:
-
- A. reproduce and Share the Licensed Material, in whole or in part; and
-
- B. produce, reproduce, and Share Adapted Material.
-
- 2. Exceptions and Limitations. For the avoidance of doubt, where
- Exceptions and Limitations apply to Your use, this Public
- License does not apply, and You do not need to comply with its
- terms and conditions.
-
- 3. Term. The term of this Public License is specified in Section 6(a).
-
- 4. Media and formats; technical modifications allowed. The Licensor
- authorizes You to exercise the Licensed Rights in all media and
- formats whether now known or hereafter created, and to make
- technical modifications necessary to do so. The Licensor waives
- and/or agrees not to assert any right or authority to forbid You
- from making technical modifications necessary to exercise the
- Licensed Rights, including technical modifications necessary to
- circumvent Effective Technological Measures. For purposes of
- this Public License, simply making modifications authorized by
- this Section 2(a)(4) never produces Adapted Material.
-
- 5. Downstream recipients.
-
- A. Offer from the Licensor - Licensed Material. Every recipient
- of the Licensed Material automatically receives an offer
- from the Licensor to exercise the Licensed Rights under the
- terms and conditions of this Public License.
-
- B. Additional offer from the Licensor - Adapted Material. Every
- recipient of Adapted Material from You automatically
- receives an offer from the Licensor to exercise the Licensed
- Rights in the Adapted Material under the conditions of the
- Adapter's License You apply.
-
- C. No downstream restrictions. You may not offer or impose any
- additional or different terms or conditions on, or apply any
- Effective Technological Measures to, the Licensed Material
- if doing so restricts exercise of the Licensed Rights by any
- recipient of the Licensed Material.
-
- 6. No endorsement. Nothing in this Public License constitutes or
- may be construed as permission to assert or imply that You are,
- or that Your use of the Licensed Material is, connected with, or
- sponsored, endorsed, or granted official status by, the Licensor
- or others designated to receive attribution as provided in
- Section 3(a)(1)(A)(i).
-
- b. Other rights.
-
- 1. Moral rights, such as the right of integrity, are not licensed
- under this Public License, nor are publicity, privacy, and/or
- other similar personality rights; however, to the extent
- possible, the Licensor waives and/or agrees not to assert any
- such rights held by the Licensor to the limited extent necessary
- to allow You to exercise the Licensed Rights, but not otherwise.
-
- 2. Patent and trademark rights are not licensed under this Public
- License.
-
- 3. To the extent possible, the Licensor waives any right to collect
- royalties from You for the exercise of the Licensed Rights,
- whether directly or through a collecting society under any
- voluntary or waivable statutory or compulsory licensing
- scheme. In all other cases the Licensor expressly reserves any
- right to collect such royalties.
-
-Section 3 - License Conditions.
-
-Your exercise of the Licensed Rights is expressly made subject to the
-following conditions.
-
- a. Attribution.
-
- 1. If You Share the Licensed Material (including in modified form),
- You must:
-
- A. retain the following if it is supplied by the Licensor with
- the Licensed Material:
-
- i. identification of the creator(s) of the Licensed
- Material and any others designated to receive
- attribution, in any reasonable manner requested by the
- Licensor (including by pseudonym if designated);
-
- ii. a copyright notice;
-
- iii. a notice that refers to this Public License;
-
- iv. a notice that refers to the disclaimer of warranties;
-
- v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
-
- B. indicate if You modified the Licensed Material and retain an
- indication of any previous modifications; and
-
- C. indicate the Licensed Material is licensed under this Public
- License, and include the text of, or the URI or hyperlink to,
- this Public License.
-
- 2. You may satisfy the conditions in Section 3(a)(1) in any
- reasonable manner based on the medium, means, and context in
- which You Share the Licensed Material. For example, it may be
- reasonable to satisfy the conditions by providing a URI or
- hyperlink to a resource that includes the required information.
-
- 3. If requested by the Licensor, You must remove any of the
- information required by Section 3(a)(1)(A) to the extent
- reasonably practicable. b. ShareAlike.In addition to the
- conditions in Section 3(a), if You Share Adapted Material You
- produce, the following conditions also apply.
-
- 1. The Adapter's License You apply must be a Creative Commons
- license with the same License Elements, this version or
- later, or a BY-SA Compatible License.
-
- 2. You must include the text of, or the URI or hyperlink to, the
- Adapter's License You apply. You may satisfy this condition
- in any reasonable manner based on the medium, means, and
- context in which You Share Adapted Material.
-
- 3. You may not offer or impose any additional or different terms
- or conditions on, or apply any Effective Technological
- Measures to, Adapted Material that restrict exercise of the
- rights granted under the Adapter's License You apply.
-
-Section 4 - Sui Generis Database Rights.
-
-Where the Licensed Rights include Sui Generis Database Rights that apply to
-Your use of the Licensed Material:
-
- a. for the avoidance of doubt, Section 2(a)(1) grants You the right to
- extract, reuse, reproduce, and Share all or a substantial portion of
- the contents of the database;
-
- b. if You include all or a substantial portion of the database contents
- in a database in which You have Sui Generis Database Rights, then
- the database in which You have Sui Generis Database Rights (but not
- its individual contents) is Adapted Material, including for purposes
- of Section 3(b); and
-
- c. You must comply with the conditions in Section 3(a) if You Share all
- or a substantial portion of the contents of the database.
-
- For the avoidance of doubt, this Section 4 supplements and does not
- replace Your obligations under this Public License where the Licensed
- Rights include other Copyright and Similar Rights.
-
-Section 5 - Disclaimer of Warranties and Limitation of Liability.
-
- a. Unless otherwise separately undertaken by the Licensor, to the
- extent possible, the Licensor offers the Licensed Material as-is and
- as-available, and makes no representations or warranties of any kind
- concerning the Licensed Material, whether express, implied,
- statutory, or other. This includes, without limitation, warranties
- of title, merchantability, fitness for a particular purpose,
- non-infringement, absence of latent or other defects, accuracy, or
- the presence or absence of errors, whether or not known or
- discoverable. Where disclaimers of warranties are not allowed in
- full or in part, this disclaimer may not apply to You.
-
- b. To the extent possible, in no event will the Licensor be liable to
- You on any legal theory (including, without limitation, negligence)
- or otherwise for any direct, special, indirect, incidental,
- consequential, punitive, exemplary, or other losses, costs,
- expenses, or damages arising out of this Public License or use of
- the Licensed Material, even if the Licensor has been advised of the
- possibility of such losses, costs, expenses, or damages. Where a
- limitation of liability is not allowed in full or in part, this
- limitation may not apply to You.
-
- c. The disclaimer of warranties and limitation of liability provided
- above shall be interpreted in a manner that, to the extent possible,
- most closely approximates an absolute disclaimer and waiver of all
- liability.
-
-Section 6 - Term and Termination.
-
- a. This Public License applies for the term of the Copyright and
- Similar Rights licensed here. However, if You fail to comply with
- this Public License, then Your rights under this Public License
- terminate automatically.
-
- b. Where Your right to use the Licensed Material has terminated under
- Section 6(a), it reinstates:
-
- 1. automatically as of the date the violation is cured, provided it
- is cured within 30 days of Your discovery of the violation; or
-
- 2. upon express reinstatement by the Licensor.
-
- c. For the avoidance of doubt, this Section 6(b) does not affect any
- right the Licensor may have to seek remedies for Your violations of
- this Public License.
-
- d. For the avoidance of doubt, the Licensor may also offer the Licensed
- Material under separate terms or conditions or stop distributing the
- Licensed Material at any time; however, doing so will not terminate
- this Public License.
-
- e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
-
-Section 7 - Other Terms and Conditions.
-
- a. The Licensor shall not be bound by any additional or different terms
- or conditions communicated by You unless expressly agreed.
-
- b. Any arrangements, understandings, or agreements regarding the
- Licensed Material not stated herein are separate from and
- independent of the terms and conditions of this Public License.
-
-Section 8 - Interpretation.
-
- a. For the avoidance of doubt, this Public License does not, and shall
- not be interpreted to, reduce, limit, restrict, or impose conditions
- on any use of the Licensed Material that could lawfully be made
- without permission under this Public License.
-
- b. To the extent possible, if any provision of this Public License is
- deemed unenforceable, it shall be automatically reformed to the
- minimum extent necessary to make it enforceable. If the provision
- cannot be reformed, it shall be severed from this Public License
- without affecting the enforceability of the remaining terms and
- conditions.
-
- c. No term or condition of this Public License will be waived and no
- failure to comply consented to unless expressly agreed to by the
- Licensor.
-
- d. Nothing in this Public License constitutes or may be interpreted as
- a limitation upon, or waiver of, any privileges and immunities that
- apply to the Licensor or You, including from the legal processes of
- any jurisdiction or authority.
-
-Creative Commons is not a party to its public licenses. Notwithstanding,
-Creative Commons may elect to apply one of its public licenses to material
-it publishes and in those instances will be considered the "Licensor." The
-text of the Creative Commons public licenses is dedicated to the public
-domain under the CC0 Public Domain Dedication. Except for the limited
-purpose of indicating that material is shared under a Creative Commons
-public license or as otherwise permitted by the Creative Commons policies
-published at creativecommons.org/policies, Creative Commons does not
-authorize the use of the trademark "Creative Commons" or any other
-trademark or logo of Creative Commons without its prior written consent
-including, without limitation, in connection with any unauthorized
-modifications to any of its public licenses or any other arrangements,
-understandings, or agreements concerning use of licensed material. For the
-avoidance of doubt, this paragraph does not form part of the public
-licenses.
-
-Creative Commons may be contacted at creativecommons.org.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7f1399ac028e..e0eb8ec08789 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7351,15 +7351,16 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git
S: Supported
F: Documentation/networking/e100.rst
F: Documentation/networking/e1000.rst
-F: Documentation/networking/e1000e.txt
-F: Documentation/networking/igb.txt
-F: Documentation/networking/igbvf.txt
-F: Documentation/networking/ixgb.txt
-F: Documentation/networking/ixgbe.txt
-F: Documentation/networking/ixgbevf.txt
-F: Documentation/networking/i40e.txt
-F: Documentation/networking/iavf.txt
-F: Documentation/networking/ice.txt
+F: Documentation/networking/e1000e.rst
+F: Documentation/networking/fm10k.rst
+F: Documentation/networking/igb.rst
+F: Documentation/networking/igbvf.rst
+F: Documentation/networking/ixgb.rst
+F: Documentation/networking/ixgbe.rst
+F: Documentation/networking/ixgbevf.rst
+F: Documentation/networking/i40e.rst
+F: Documentation/networking/iavf.rst
+F: Documentation/networking/ice.rst
F: drivers/net/ethernet/intel/
F: drivers/net/ethernet/intel/*/
F: include/linux/avf/virtchnl.h
@@ -10160,7 +10161,6 @@ L: netdev@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
S: Maintained
-F: net/core/flow.c
F: net/xfrm/
F: net/key/
F: net/ipv4/xfrm*
@@ -13100,7 +13100,7 @@ SELINUX SECURITY MODULE
M: Paul Moore <paul@paul-moore.com>
M: Stephen Smalley <sds@tycho.nsa.gov>
M: Eric Paris <eparis@parisplace.org>
-L: selinux@tycho.nsa.gov (moderated for non-subscribers)
+L: selinux@vger.kernel.org
W: https://selinuxproject.org
W: https://github.com/SELinuxProject
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
@@ -15745,7 +15745,7 @@ F: include/linux/regulator/
VRF
M: David Ahern <dsa@cumulusnetworks.com>
-M: Shrijeet Mukherjee <shm@cumulusnetworks.com>
+M: Shrijeet Mukherjee <shrijeet@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/vrf.c
diff --git a/Makefile b/Makefile
index e8b599b4dcde..bf3786e4ffec 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
NAME = Merciless Moray
# *DOCUMENTATION*
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 450c7a4fbc8a..cb094e55dc5f 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = {
/* ICC_SGI1R */
{ CRm64(12), Op1( 0), is64, access_gic_sgi},
- /* ICC_ASGI1R */
- { CRm64(12), Op1( 1), is64, access_gic_sgi},
- /* ICC_SGI0R */
- { CRm64(12), Op1( 2), is64, access_gic_sgi},
/* VBAR: swapped by interrupt.S. */
{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
NULL, reset_val, c12_VBAR, 0x00000000 },
+ /* ICC_ASGI1R */
+ { CRm64(12), Op1( 1), is64, access_gic_sgi},
+ /* ICC_SGI0R */
+ { CRm64(12), Op1( 2), is64, access_gic_sgi},
/* ICC_SRE */
{ CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre },
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 8e38d5267f22..e213f8e867f6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}
+static int armv8pmu_filter_match(struct perf_event *event)
+{
+ unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
+ return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
+}
+
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->stop = armv8pmu_stop,
cpu_pmu->reset = armv8pmu_reset,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
+ cpu_pmu->filter_match = armv8pmu_filter_match;
return 0;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 5b4fac434c84..b3354ff94e79 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -64,6 +64,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/mmu_context.h>
+static int num_standard_resources;
+static struct resource *standard_resources;
+
phys_addr_t __fdt_pointer __initdata;
/*
@@ -206,14 +209,19 @@ static void __init request_standard_resources(void)
{
struct memblock_region *region;
struct resource *res;
+ unsigned long i = 0;
kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
+ num_standard_resources = memblock.memory.cnt;
+ standard_resources = alloc_bootmem_low(num_standard_resources *
+ sizeof(*standard_resources));
+
for_each_memblock(memory, region) {
- res = alloc_bootmem_low(sizeof(*res));
+ res = &standard_resources[i++];
if (memblock_is_nomap(region)) {
res->name = "reserved";
res->flags = IORESOURCE_MEM;
@@ -243,36 +251,26 @@ static void __init request_standard_resources(void)
static int __init reserve_memblock_reserved_regions(void)
{
- phys_addr_t start, end, roundup_end = 0;
- struct resource *mem, *res;
- u64 i;
-
- for_each_reserved_mem_region(i, &start, &end) {
- if (end <= roundup_end)
- continue; /* done already */
-
- start = __pfn_to_phys(PFN_DOWN(start));
- end = __pfn_to_phys(PFN_UP(end)) - 1;
- roundup_end = end;
-
- res = kzalloc(sizeof(*res), GFP_ATOMIC);
- if (WARN_ON(!res))
- return -ENOMEM;
- res->start = start;
- res->end = end;
- res->name = "reserved";
- res->flags = IORESOURCE_MEM;
-
- mem = request_resource_conflict(&iomem_resource, res);
- /*
- * We expected memblock_reserve() regions to conflict with
- * memory created by request_standard_resources().
- */
- if (WARN_ON_ONCE(!mem))
+ u64 i, j;
+
+ for (i = 0; i < num_standard_resources; ++i) {
+ struct resource *mem = &standard_resources[i];
+ phys_addr_t r_start, r_end, mem_size = resource_size(mem);
+
+ if (!memblock_is_region_reserved(mem->start, mem_size))
continue;
- kfree(res);
- reserve_region_with_split(mem, start, end, "reserved");
+ for_each_reserved_mem_region(j, &r_start, &r_end) {
+ resource_size_t start, end;
+
+ start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
+ end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end);
+
+ if (start > mem->end || end < mem->start)
+ continue;
+
+ reserve_region_with_split(mem, start, end, "reserved");
+ }
}
return 0;
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index f329b466e68f..2d14f17838d2 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info,
r.gr[30] = get_parisc_stackpointer();
regs = &r;
}
- unwind_frame_init(info, task, &r);
+ unwind_frame_init(info, task, regs);
} else {
unwind_frame_init_from_blocked_task(info, task);
}
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2fdc865ca374..2a2486526d1f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -114,7 +114,7 @@
*/
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
/*
* user access blocked by key
*/
@@ -132,7 +132,7 @@
*/
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
index 666d6b5c0440..9c3fc03abe9a 100644
--- a/arch/sparc/include/asm/cpudata_64.h
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -28,7 +28,7 @@ typedef struct {
unsigned short sock_id; /* physical package */
unsigned short core_id;
unsigned short max_cache_id; /* groupings of highest shared cache */
- unsigned short proc_id; /* strand (aka HW thread) id */
+ signed short proc_id; /* strand (aka HW thread) id */
} cpuinfo_sparc;
DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 09acf0ddec10..45b4bf1875e6 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -427,8 +427,9 @@
#define __NR_preadv2 358
#define __NR_pwritev2 359
#define __NR_statx 360
+#define __NR_io_pgetevents 361
-#define NR_syscalls 361
+#define NR_syscalls 362
/* Bitmask values returned from kern_features system call. */
#define KERN_FEATURE_MIXED_MODE_STACK 0x00000001
diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
index cc42225c20f3..4e8f56c3793c 100644
--- a/arch/sparc/kernel/auxio_64.c
+++ b/arch/sparc/kernel/auxio_64.c
@@ -115,8 +115,8 @@ static int auxio_probe(struct platform_device *dev)
auxio_devtype = AUXIO_TYPE_SBUS;
size = 1;
} else {
- printk("auxio: Unknown parent bus type [%pOFn]\n",
- dp->parent);
+ printk("auxio: Unknown parent bus type [%s]\n",
+ dp->parent->name);
return -ENODEV;
}
auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index d3149baaa33c..67b3e6b3ce5d 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -24,6 +24,7 @@
#include <asm/cpudata.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include <linux/sched/clock.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
#include <asm/cacheflush.h>
@@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
sparc_perf_event_update(cp, &cp->hw,
cpuc->current_idx[i]);
cpuc->current_idx[i] = PIC_NO_INDEX;
+ if (cp->hw.state & PERF_HES_STOPPED)
+ cp->hw.state |= PERF_HES_ARCH;
}
}
}
@@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[0] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
+ if (hwc->state & PERF_HES_ARCH) {
cpuc->pcr[0] |= nop_for_index(idx);
- else
+ } else {
cpuc->pcr[0] |= event_encoding(enc, idx);
+ hwc->state = 0;
+ }
}
out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
@@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
cpuc->current_idx[i] = idx;
+ if (cp->hw.state & PERF_HES_ARCH)
+ continue;
+
sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
@@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
event->hw.state = 0;
sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+ perf_event_update_userpage(event);
}
static void sparc_pmu_stop(struct perf_event *event, int flags)
@@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
cpuc->events[n0] = event->hw.event_base;
cpuc->current_idx[n0] = PIC_NO_INDEX;
- event->hw.state = PERF_HES_UPTODATE;
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(ef_flags & PERF_EF_START))
- event->hw.state |= PERF_HES_STOPPED;
+ event->hw.state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
@@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
+ u64 finish_clock;
+ u64 start_clock;
int i;
if (!atomic_read(&active_events))
@@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
return NOTIFY_DONE;
}
+ start_clock = sched_clock();
+
regs = args->regs;
cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
sparc_pmu_stop(event, 0);
}
+ finish_clock = sched_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
return NOTIFY_STOP;
}
diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c
index d941875dd718..92627abce311 100644
--- a/arch/sparc/kernel/power.c
+++ b/arch/sparc/kernel/power.c
@@ -41,8 +41,8 @@ static int power_probe(struct platform_device *op)
power_reg = of_ioremap(res, 0, 0x4, "power");
- printk(KERN_INFO "%pOFn: Control reg at %llx\n",
- op->dev.of_node, res->start);
+ printk(KERN_INFO "%s: Control reg at %llx\n",
+ op->dev.of_node->name, res->start);
if (has_button_interrupt(irq, op->dev.of_node)) {
if (request_irq(irq,
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 17c87d29ff20..b51cbb9e87dc 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -68,8 +68,8 @@ static void __init sparc32_path_component(struct device_node *dp, char *tmp_buf)
return;
regs = rprop->value;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
regs->which_io, regs->phys_addr);
}
@@ -84,8 +84,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
return;
regs = prop->value;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
regs->which_io,
regs->phys_addr);
}
@@ -104,13 +104,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
devfn = (regs->phys_hi >> 8) & 0xff;
if (devfn & 0x07) {
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
devfn >> 3,
devfn & 0x07);
} else {
- sprintf(tmp_buf, "%pOFn@%x",
- dp,
+ sprintf(tmp_buf, "%s@%x",
+ dp->name,
devfn >> 3);
}
}
@@ -127,8 +127,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
regs->which_io, regs->phys_addr);
}
@@ -167,8 +167,8 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
return;
device = prop->value;
- sprintf(tmp_buf, "%pOFn:%d:%d@%x,%x",
- dp, *vendor, *device,
+ sprintf(tmp_buf, "%s:%d:%d@%x,%x",
+ dp->name, *vendor, *device,
*intr, reg0);
}
@@ -201,7 +201,7 @@ char * __init build_path_component(struct device_node *dp)
tmp_buf[0] = '\0';
__build_path_component(dp, tmp_buf);
if (tmp_buf[0] == '\0')
- snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp);
+ strcpy(tmp_buf, dp->name);
n = prom_early_alloc(strlen(tmp_buf) + 1);
strcpy(n, tmp_buf);
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index 6220411ce8fc..baeaeed64993 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -82,8 +82,8 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
regs = rprop->value;
if (!of_node_is_root(dp->parent)) {
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
(unsigned int) (regs->phys_addr >> 32UL),
(unsigned int) (regs->phys_addr & 0xffffffffUL));
return;
@@ -97,17 +97,17 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
const char *prefix = (type == 0) ? "m" : "i";
if (low_bits)
- sprintf(tmp_buf, "%pOFn@%s%x,%x",
- dp, prefix,
+ sprintf(tmp_buf, "%s@%s%x,%x",
+ dp->name, prefix,
high_bits, low_bits);
else
- sprintf(tmp_buf, "%pOFn@%s%x",
- dp,
+ sprintf(tmp_buf, "%s@%s%x",
+ dp->name,
prefix,
high_bits);
} else if (type == 12) {
- sprintf(tmp_buf, "%pOFn@%x",
- dp, high_bits);
+ sprintf(tmp_buf, "%s@%x",
+ dp->name, high_bits);
}
}
@@ -122,8 +122,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
if (!of_node_is_root(dp->parent)) {
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
(unsigned int) (regs->phys_addr >> 32UL),
(unsigned int) (regs->phys_addr & 0xffffffffUL));
return;
@@ -138,8 +138,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
if (tlb_type >= cheetah)
mask = 0x7fffff;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
*(u32 *)prop->value,
(unsigned int) (regs->phys_addr & mask));
}
@@ -156,8 +156,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
return;
regs = prop->value;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
regs->which_io,
regs->phys_addr);
}
@@ -176,13 +176,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
devfn = (regs->phys_hi >> 8) & 0xff;
if (devfn & 0x07) {
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
devfn >> 3,
devfn & 0x07);
} else {
- sprintf(tmp_buf, "%pOFn@%x",
- dp,
+ sprintf(tmp_buf, "%s@%x",
+ dp->name,
devfn >> 3);
}
}
@@ -203,8 +203,8 @@ static void __init upa_path_component(struct device_node *dp, char *tmp_buf)
if (!prop)
return;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
*(u32 *) prop->value,
(unsigned int) (regs->phys_addr & 0xffffffffUL));
}
@@ -221,7 +221,7 @@ static void __init vdev_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
- sprintf(tmp_buf, "%pOFn@%x", dp, *regs);
+ sprintf(tmp_buf, "%s@%x", dp->name, *regs);
}
/* "name@addrhi,addrlo" */
@@ -236,8 +236,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp,
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name,
(unsigned int) (regs->phys_addr >> 32UL),
(unsigned int) (regs->phys_addr & 0xffffffffUL));
}
@@ -257,8 +257,8 @@ static void __init i2c_path_component(struct device_node *dp, char *tmp_buf)
/* This actually isn't right... should look at the #address-cells
* property of the i2c bus node etc. etc.
*/
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp, regs[0], regs[1]);
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name, regs[0], regs[1]);
}
/* "name@reg0[,reg1]" */
@@ -274,11 +274,11 @@ static void __init usb_path_component(struct device_node *dp, char *tmp_buf)
regs = prop->value;
if (prop->length == sizeof(u32) || regs[1] == 1) {
- sprintf(tmp_buf, "%pOFn@%x",
- dp, regs[0]);
+ sprintf(tmp_buf, "%s@%x",
+ dp->name, regs[0]);
} else {
- sprintf(tmp_buf, "%pOFn@%x,%x",
- dp, regs[0], regs[1]);
+ sprintf(tmp_buf, "%s@%x,%x",
+ dp->name, regs[0], regs[1]);
}
}
@@ -295,11 +295,11 @@ static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf
regs = prop->value;
if (regs[2] || regs[3]) {
- sprintf(tmp_buf, "%pOFn@%08x%08x,%04x%08x",
- dp, regs[0], regs[1], regs[2], regs[3]);
+ sprintf(tmp_buf, "%s@%08x%08x,%04x%08x",
+ dp->name, regs[0], regs[1], regs[2], regs[3]);
} else {
- sprintf(tmp_buf, "%pOFn@%08x%08x",
- dp, regs[0], regs[1]);
+ sprintf(tmp_buf, "%s@%08x%08x",
+ dp->name, regs[0], regs[1]);
}
}
@@ -361,7 +361,7 @@ char * __init build_path_component(struct device_node *dp)
tmp_buf[0] = '\0';
__build_path_component(dp, tmp_buf);
if (tmp_buf[0] == '\0')
- snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp);
+ strcpy(tmp_buf, dp->name);
n = prom_early_alloc(strlen(tmp_buf) + 1);
strcpy(n, tmp_buf);
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index f6528884a2c8..4073e2b87dd0 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -84,8 +84,9 @@ __handle_signal:
ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
sethi %hi(0xf << 20), %l4
and %l1, %l4, %l4
+ andn %l1, %l4, %l1
ba,pt %xcc, __handle_preemption_continue
- andn %l1, %l4, %l1
+ srl %l4, 20, %l4
/* When returning from a NMI (%pil==15) interrupt we want to
* avoid running softirqs, doing IRQ tracing, preempting, etc.
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 12bee14b552c..621a363098ec 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -90,4 +90,4 @@ sys_call_table:
/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/ .long sys_statx
+/*360*/ .long sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 387ef993880a..bb68c805b891 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -91,7 +91,7 @@ sys_call_table32:
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
-/*360*/ .word sys_statx
+/*360*/ .word sys_statx, compat_sys_io_pgetevents
#endif /* CONFIG_COMPAT */
@@ -173,4 +173,4 @@ sys_call_table:
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
.word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
-/*360*/ .word sys_statx
+/*360*/ .word sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
index 3feb3d960ca5..75dca9aab737 100644
--- a/arch/sparc/vdso/vclock_gettime.c
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -33,9 +33,19 @@
#define TICK_PRIV_BIT (1ULL << 63)
#endif
+#ifdef CONFIG_SPARC64
#define SYSCALL_STRING \
"ta 0x6d;" \
- "sub %%g0, %%o0, %%o0;" \
+ "bcs,a 1f;" \
+ " sub %%g0, %%o0, %%o0;" \
+ "1:"
+#else
+#define SYSCALL_STRING \
+ "ta 0x10;" \
+ "bcs,a 1f;" \
+ " sub %%g0, %%o0, %%o0;" \
+ "1:"
+#endif
#define SYSCALL_CLOBBERS \
"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
index f51595f861b8..5eaff3c1aa0c 100644
--- a/arch/sparc/vdso/vma.c
+++ b/arch/sparc/vdso/vma.c
@@ -262,7 +262,9 @@ static __init int vdso_setup(char *s)
unsigned long val;
err = kstrtoul(s, 10, &val);
+ if (err)
+ return err;
vdso_enabled = val;
- return err;
+ return 0;
}
__setup("vdso=", vdso_setup);
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 28764dacf018..466f66c8a7f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..fbbf1ba57ec6 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -389,6 +389,13 @@
* that register for the time this macro runs
*/
+ /*
+ * The high bits of the CS dword (__csh) are used for
+ * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
+ * hardware didn't do this for us.
+ */
+ andl $(0x0000ffff), PT_CS(%esp)
+
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -407,12 +414,6 @@
/* Load top of task-stack into %edi */
movl TSS_entry2task_stack(%edi), %edi
- /*
- * Clear unused upper bits of the dword containing the word-sized CS
- * slot in pt_regs in case hardware didn't clear it for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Special case - entry from kernel mode via entry stack */
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..f95dcb209fdf 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry)
xorl %ebx, %ebx
1:
+ /*
+ * Always stash CR3 in %r14. This value will be restored,
+ * verbatim, at exit. Needed if paranoid_entry interrupted
+ * another entry that already switched to the user CR3 value
+ * but has not yet returned to userspace.
+ *
+ * This is also why CS (stashed in the "iret frame" by the
+ * hardware at entry) can not be used: this may be a return
+ * to kernel code, but with a user CR3 value.
+ */
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
ret
@@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
@@ -1626,6 +1638,7 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
+ /* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
testl %ebx, %ebx /* swapgs needed? */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index a38bf5a1e37a..69dcdf195b61 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (old_fpu->initialized) {
+ if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e9202a0de8f0..1a19d11cfbbd 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -185,22 +185,22 @@ do { \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(1)",%0" \
+ asm volatile(op "b "__percpu_arg(1)",%0"\
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(1)",%0" \
+ asm volatile(op "w "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(1)",%0" \
+ asm volatile(op "l "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(1)",%0" \
+ asm volatile(op "q "__percpu_arg(1)",%0"\
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b64acb08a62b..106b7d0e2dae 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -124,7 +124,7 @@
*/
#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 23f1691670b6..61a949d84dfa 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Validate and sanitize the copied state.
*/
- struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 661583662430..71c0b01d93b1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
int __init pci_swiotlb_detect_4gb(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-#ifdef CONFIG_X86_64
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
-#endif
/*
* If SME is active then swiotlb will be set to 1 so that bounce
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index be01328eb755..fddaefc51fb6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,7 +25,7 @@
#include <asm/time.h>
#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
+__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
#endif
unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b52bd2b6cdb4..6d5dc5dabfd7 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -58,7 +58,7 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-void cyc2ns_read_begin(struct cyc2ns_data *data)
+void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-void cyc2ns_read_end(void)
+void __always_inline cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
@@ -104,7 +104,7 @@ void cyc2ns_read_end(void)
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d96092b35936..61ccfb13899e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
static inline bool svm_sev_enabled(void)
{
- return max_sev_asid;
+ return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
}
static inline bool sev_guest(struct kvm *kvm)
{
+#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->active;
+#else
+ return false;
+#endif
}
static inline int sev_get_asid(struct kvm *kvm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 612fd17be635..e665aa7167cf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
goto out;
}
+ /*
+ * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
+ * base of EPT PML4 table, strip off EPT configuration information.
+ */
ret = hyperv_flush_guest_mapping(
- to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+ to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
out:
spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index d1b9dd03da25..bbd44666f2b5 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -29,9 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
{
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
- unsigned int granularity;
unsigned int op;
- int alignment;
sector_t bs_mask;
if (!q)
@@ -54,38 +52,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
- /* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
- alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
while (nr_sects) {
- unsigned int req_sects;
- sector_t end_sect, tmp;
+ unsigned int req_sects = nr_sects;
+ sector_t end_sect;
- /*
- * Issue in chunks of the user defined max discard setting,
- * ensuring that bi_size doesn't overflow
- */
- req_sects = min_t(sector_t, nr_sects,
- q->limits.max_discard_sectors);
if (!req_sects)
goto fail;
if (req_sects > UINT_MAX >> 9)
req_sects = UINT_MAX >> 9;
- /*
- * If splitting a request, and the next starting sector would be
- * misaligned, stop the discard at the previous aligned sector.
- */
end_sect = sector + req_sects;
- tmp = end_sect;
- if (req_sects < nr_sects &&
- sector_div(tmp, granularity) != alignment) {
- end_sect = end_sect - alignment;
- sector_div(end_sect, granularity);
- end_sect = end_sect * granularity + alignment;
- req_sects = end_sect - sector;
- }
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8e20a0677dcf..8ac93fcbaa2e 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -310,6 +310,7 @@ static void scale_up(struct rq_wb *rwb)
rq_depth_scale_up(&rwb->rq_depth);
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
+ rwb_wake_all(rwb);
rwb_trace_step(rwb, "scale up");
}
@@ -318,7 +319,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
- rwb_wake_all(rwb);
rwb_trace_step(rwb, "scale down");
}
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 6470e3c4c990..f8c703426c90 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -241,7 +241,8 @@ static void __iomem *eni_alloc_mem(struct eni_dev *eni_dev, unsigned long *size)
len = eni_dev->free_len;
if (*size < MID_MIN_BUF_SIZE) *size = MID_MIN_BUF_SIZE;
if (*size > MID_MAX_BUF_SIZE) return NULL;
- for (order = 0; (1 << order) < *size; order++);
+ for (order = 0; (1 << order) < *size; order++)
+ ;
DPRINTK("trying: %ld->%d\n",*size,order);
best_order = 65; /* we don't have more than 2^64 of anything ... */
index = 0; /* silence GCC */
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index e89146ddede6..d5c76b50d357 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -126,7 +126,7 @@ static unsigned long dummy[2] = {0,0};
#define zin_n(r) inl(zatm_dev->base+r*4)
#define zin(r) inl(zatm_dev->base+uPD98401_##r*4)
#define zout(v,r) outl(v,zatm_dev->base+uPD98401_##r*4)
-#define zwait while (zin(CMR) & uPD98401_BUSY)
+#define zwait() do {} while (zin(CMR) & uPD98401_BUSY)
/* RX0, RX1, TX0, TX1 */
static const int mbx_entries[NR_MBX] = { 1024,1024,1024,1024 };
@@ -140,7 +140,7 @@ static const int mbx_esize[NR_MBX] = { 16,16,4,4 }; /* entry size in bytes */
static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr)
{
- zwait;
+ zwait();
zout(value,CER);
zout(uPD98401_IND_ACC | uPD98401_IA_BALL |
(uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR);
@@ -149,10 +149,10 @@ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr)
static u32 zpeekl(struct zatm_dev *zatm_dev,u32 addr)
{
- zwait;
+ zwait();
zout(uPD98401_IND_ACC | uPD98401_IA_BALL | uPD98401_IA_RW |
(uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR);
- zwait;
+ zwait();
return zin(CER);
}
@@ -241,7 +241,7 @@ static void refill_pool(struct atm_dev *dev,int pool)
}
if (first) {
spin_lock_irqsave(&zatm_dev->lock, flags);
- zwait;
+ zwait();
zout(virt_to_bus(first),CER);
zout(uPD98401_ADD_BAT | (pool << uPD98401_POOL_SHIFT) | count,
CMR);
@@ -508,9 +508,9 @@ static int open_rx_first(struct atm_vcc *vcc)
}
if (zatm_vcc->pool < 0) return -EMSGSIZE;
spin_lock_irqsave(&zatm_dev->lock, flags);
- zwait;
+ zwait();
zout(uPD98401_OPEN_CHAN,CMR);
- zwait;
+ zwait();
DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER));
chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT;
spin_unlock_irqrestore(&zatm_dev->lock, flags);
@@ -571,21 +571,21 @@ static void close_rx(struct atm_vcc *vcc)
pos = vcc->vci >> 1;
shift = (1-(vcc->vci & 1)) << 4;
zpokel(zatm_dev,zpeekl(zatm_dev,pos) & ~(0xffff << shift),pos);
- zwait;
+ zwait();
zout(uPD98401_NOP,CMR);
- zwait;
+ zwait();
zout(uPD98401_NOP,CMR);
spin_unlock_irqrestore(&zatm_dev->lock, flags);
}
spin_lock_irqsave(&zatm_dev->lock, flags);
- zwait;
+ zwait();
zout(uPD98401_DEACT_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan <<
uPD98401_CHAN_ADDR_SHIFT),CMR);
- zwait;
+ zwait();
udelay(10); /* why oh why ... ? */
zout(uPD98401_CLOSE_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan <<
uPD98401_CHAN_ADDR_SHIFT),CMR);
- zwait;
+ zwait();
if (!(zin(CMR) & uPD98401_CHAN_ADDR))
printk(KERN_CRIT DEV_LABEL "(itf %d): can't close RX channel "
"%d\n",vcc->dev->number,zatm_vcc->rx_chan);
@@ -699,7 +699,7 @@ printk("NONONONOO!!!!\n");
skb_queue_tail(&zatm_vcc->tx_queue,skb);
DPRINTK("QRP=0x%08lx\n",zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+
uPD98401_TXVC_QRP));
- zwait;
+ zwait();
zout(uPD98401_TX_READY | (zatm_vcc->tx_chan <<
uPD98401_CHAN_ADDR_SHIFT),CMR);
spin_unlock_irqrestore(&zatm_dev->lock, flags);
@@ -891,12 +891,12 @@ static void close_tx(struct atm_vcc *vcc)
}
spin_lock_irqsave(&zatm_dev->lock, flags);
#if 0
- zwait;
+ zwait();
zout(uPD98401_DEACT_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR);
#endif
- zwait;
+ zwait();
zout(uPD98401_CLOSE_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR);
- zwait;
+ zwait();
if (!(zin(CMR) & uPD98401_CHAN_ADDR))
printk(KERN_CRIT DEV_LABEL "(itf %d): can't close TX channel "
"%d\n",vcc->dev->number,chan);
@@ -926,9 +926,9 @@ static int open_tx_first(struct atm_vcc *vcc)
zatm_vcc->tx_chan = 0;
if (vcc->qos.txtp.traffic_class == ATM_NONE) return 0;
spin_lock_irqsave(&zatm_dev->lock, flags);
- zwait;
+ zwait();
zout(uPD98401_OPEN_CHAN,CMR);
- zwait;
+ zwait();
DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER));
chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT;
spin_unlock_irqrestore(&zatm_dev->lock, flags);
@@ -1557,7 +1557,7 @@ static void zatm_phy_put(struct atm_dev *dev,unsigned char value,
struct zatm_dev *zatm_dev;
zatm_dev = ZATM_DEV(dev);
- zwait;
+ zwait();
zout(value,CER);
zout(uPD98401_IND_ACC | uPD98401_IA_B0 |
(uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR);
@@ -1569,10 +1569,10 @@ static unsigned char zatm_phy_get(struct atm_dev *dev,unsigned long addr)
struct zatm_dev *zatm_dev;
zatm_dev = ZATM_DEV(dev);
- zwait;
+ zwait();
zout(uPD98401_IND_ACC | uPD98401_IA_B0 | uPD98401_IA_RW |
(uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR);
- zwait;
+ zwait();
return zin(CER) & 0xff;
}
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5ca56bfae63c..f68e9baffad7 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -36,6 +36,10 @@ MODULE_VERSION(DRV_MODULE_VERSION);
#define VDC_TX_RING_SIZE 512
#define VDC_DEFAULT_BLK_SIZE 512
+#define MAX_XFER_BLKS (128 * 1024)
+#define MAX_XFER_SIZE (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
+#define MAX_RING_COOKIES ((MAX_XFER_BLKS / PAGE_SIZE) + 2)
+
#define WAITING_FOR_LINK_UP 0x01
#define WAITING_FOR_TX_SPACE 0x02
#define WAITING_FOR_GEN_CMD 0x04
@@ -450,7 +454,7 @@ static int __send_request(struct request *req)
{
struct vdc_port *port = req->rq_disk->private_data;
struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- struct scatterlist sg[port->ring_cookies];
+ struct scatterlist sg[MAX_RING_COOKIES];
struct vdc_req_entry *rqe;
struct vio_disk_desc *desc;
unsigned int map_perm;
@@ -458,6 +462,9 @@ static int __send_request(struct request *req)
u64 len;
u8 op;
+ if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
+ return -EINVAL;
+
map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
if (rq_data_dir(req) == READ) {
@@ -984,9 +991,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
goto err_out_free_port;
port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
- port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
- port->ring_cookies = ((port->max_xfer_size *
- port->vdisk_block_size) / PAGE_SIZE) + 2;
+ port->max_xfer_size = MAX_XFER_SIZE;
+ port->ring_cookies = MAX_RING_COOKIES;
err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
if (err)
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 20142bc77554..282d1af1d3ba 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -293,13 +293,17 @@ static int btsdio_probe(struct sdio_func *func,
tuple = tuple->next;
}
- /* BCM43341 devices soldered onto the PCB (non-removable) use an
- * uart connection for bluetooth, ignore the BT SDIO interface.
+ /* Broadcom devices soldered onto the PCB (non-removable) use an
+ * UART connection for Bluetooth, ignore the BT SDIO interface.
*/
if (func->vendor == SDIO_VENDOR_ID_BROADCOM &&
- func->device == SDIO_DEVICE_ID_BROADCOM_43341 &&
- !mmc_card_is_removable(func->card->host))
- return -ENODEV;
+ !mmc_card_is_removable(func->card->host)) {
+ switch (func->device) {
+ case SDIO_DEVICE_ID_BROADCOM_43341:
+ case SDIO_DEVICE_ID_BROADCOM_43430:
+ return -ENODEV;
+ }
+ }
data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL);
if (!data)
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 61cde1a7ec1b..7439a7eb50ac 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -264,6 +264,7 @@ static const struct usb_device_id blacklist_table[] = {
{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
/* QCA ROME chipset */
+ { USB_DEVICE(0x0cf3, 0x535b), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
{ USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME },
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 9f1392fc7105..f036c8f98ea3 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -40,6 +40,7 @@
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
#include <linux/serdev.h>
+#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -63,6 +64,9 @@
/* susclk rate */
#define SUSCLK_RATE_32KHZ 32768
+/* Controller debug log header */
+#define QCA_DEBUG_HANDLE 0x2EDC
+
/* HCI_IBS transmit side sleep protocol states */
enum tx_ibs_states {
HCI_IBS_TX_ASLEEP,
@@ -849,6 +853,19 @@ static int qca_ibs_wake_ack(struct hci_dev *hdev, struct sk_buff *skb)
return 0;
}
+static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ /* We receive debug logs from chip as an ACL packets.
+ * Instead of sending the data to ACL to decode the
+ * received data, we are pushing them to the above layers
+ * as a diagnostic packet.
+ */
+ if (get_unaligned_le16(skb->data) == QCA_DEBUG_HANDLE)
+ return hci_recv_diag(hdev, skb);
+
+ return hci_recv_frame(hdev, skb);
+}
+
#define QCA_IBS_SLEEP_IND_EVENT \
.type = HCI_IBS_SLEEP_IND, \
.hlen = 0, \
@@ -871,7 +888,7 @@ static int qca_ibs_wake_ack(struct hci_dev *hdev, struct sk_buff *skb)
.maxlen = HCI_MAX_IBS_SIZE
static const struct h4_recv_pkt qca_recv_pkts[] = {
- { H4_RECV_ACL, .recv = hci_recv_frame },
+ { H4_RECV_ACL, .recv = qca_recv_acl_data },
{ H4_RECV_SCO, .recv = hci_recv_frame },
{ H4_RECV_EVENT, .recv = hci_recv_frame },
{ QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind },
diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
index ffa5dac221e4..129ebd2588fd 100644
--- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
+++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c
@@ -1434,8 +1434,16 @@ static void __init sun4i_ccu_init(struct device_node *node,
return;
}
- /* Force the PLL-Audio-1x divider to 1 */
val = readl(reg + SUN4I_PLL_AUDIO_REG);
+
+ /*
+ * Force VCO and PLL bias current to lowest setting. Higher
+ * settings interfere with sigma-delta modulation and result
+ * in audible noise and distortions when using SPDIF or I2S.
+ */
+ val &= ~GENMASK(25, 16);
+
+ /* Force the PLL-Audio-1x divider to 1 */
val &= ~GENMASK(29, 26);
writel(val | (1 << 26), reg + SUN4I_PLL_AUDIO_REG);
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index 04f277cade7c..62249d4ed373 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -237,9 +237,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
static int __init chcr_crypto_init(void)
{
- if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info))
- pr_err("ULD register fail: No chcr crypto support in cxgb4\n");
-
+ cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info);
return 0;
}
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 018fcdb353d2..281cf9cbb44c 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state)
state->crtcs[i].state = NULL;
state->crtcs[i].old_state = NULL;
state->crtcs[i].new_state = NULL;
+
+ if (state->crtcs[i].commit) {
+ drm_crtc_commit_put(state->crtcs[i].commit);
+ state->crtcs[i].commit = NULL;
+ }
}
for (i = 0; i < config->num_total_plane; i++) {
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 80be74df7ba6..1bb4c318bdd4 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
struct drm_atomic_state *old_state)
{
- struct drm_crtc_state *new_crtc_state;
struct drm_crtc *crtc;
int i;
- for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) {
- struct drm_crtc_commit *commit = new_crtc_state->commit;
+ for (i = 0; i < dev->mode_config.num_crtc; i++) {
+ struct drm_crtc_commit *commit = old_state->crtcs[i].commit;
int ret;
- if (!commit)
+ crtc = old_state->crtcs[i].ptr;
+
+ if (!crtc || !commit)
continue;
ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
@@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state,
drm_crtc_commit_get(commit);
commit->abort_completion = true;
+
+ state->crtcs[i].commit = commit;
+ drm_crtc_commit_get(commit);
}
for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) {
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index bae43938c8f6..9cbe8f5c9aca 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -567,9 +567,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
struct drm_mode_crtc *crtc_req = data;
struct drm_crtc *crtc;
struct drm_plane *plane;
- struct drm_connector **connector_set = NULL, *connector;
- struct drm_framebuffer *fb = NULL;
- struct drm_display_mode *mode = NULL;
+ struct drm_connector **connector_set, *connector;
+ struct drm_framebuffer *fb;
+ struct drm_display_mode *mode;
struct drm_mode_set set;
uint32_t __user *set_connectors_ptr;
struct drm_modeset_acquire_ctx ctx;
@@ -598,6 +598,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
mutex_lock(&crtc->dev->mode_config.mutex);
drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
retry:
+ connector_set = NULL;
+ fb = NULL;
+ mode = NULL;
+
ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 3c9fc99648b7..ff0bfc65a8c1 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -113,6 +113,9 @@ static const struct edid_quirk {
/* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
{ "AEO", 0, EDID_QUIRK_FORCE_6BPC },
+ /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */
+ { "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC },
+
/* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */
{ "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC },
@@ -4279,7 +4282,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector,
struct drm_hdmi_info *hdmi = &connector->display_info.hdmi;
dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK;
- hdmi->y420_dc_modes |= dc_mask;
+ hdmi->y420_dc_modes = dc_mask;
}
static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector,
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 515a7aec57ac..9628dd617826 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1580,6 +1580,25 @@ unlock:
}
EXPORT_SYMBOL(drm_fb_helper_ioctl);
+static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1,
+ const struct fb_var_screeninfo *var_2)
+{
+ return var_1->bits_per_pixel == var_2->bits_per_pixel &&
+ var_1->grayscale == var_2->grayscale &&
+ var_1->red.offset == var_2->red.offset &&
+ var_1->red.length == var_2->red.length &&
+ var_1->red.msb_right == var_2->red.msb_right &&
+ var_1->green.offset == var_2->green.offset &&
+ var_1->green.length == var_2->green.length &&
+ var_1->green.msb_right == var_2->green.msb_right &&
+ var_1->blue.offset == var_2->blue.offset &&
+ var_1->blue.length == var_2->blue.length &&
+ var_1->blue.msb_right == var_2->blue.msb_right &&
+ var_1->transp.offset == var_2->transp.offset &&
+ var_1->transp.length == var_2->transp.length &&
+ var_1->transp.msb_right == var_2->transp.msb_right;
+}
+
/**
* drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var
* @var: screeninfo to check
@@ -1590,7 +1609,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
{
struct drm_fb_helper *fb_helper = info->par;
struct drm_framebuffer *fb = fb_helper->fb;
- int depth;
if (var->pixclock != 0 || in_dbg_master())
return -EINVAL;
@@ -1610,72 +1628,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
return -EINVAL;
}
- switch (var->bits_per_pixel) {
- case 16:
- depth = (var->green.length == 6) ? 16 : 15;
- break;
- case 32:
- depth = (var->transp.length > 0) ? 32 : 24;
- break;
- default:
- depth = var->bits_per_pixel;
- break;
- }
-
- switch (depth) {
- case 8:
- var->red.offset = 0;
- var->green.offset = 0;
- var->blue.offset = 0;
- var->red.length = 8;
- var->green.length = 8;
- var->blue.length = 8;
- var->transp.length = 0;
- var->transp.offset = 0;
- break;
- case 15:
- var->red.offset = 10;
- var->green.offset = 5;
- var->blue.offset = 0;
- var->red.length = 5;
- var->green.length = 5;
- var->blue.length = 5;
- var->transp.length = 1;
- var->transp.offset = 15;
- break;
- case 16:
- var->red.offset = 11;
- var->green.offset = 5;
- var->blue.offset = 0;
- var->red.length = 5;
- var->green.length = 6;
- var->blue.length = 5;
- var->transp.length = 0;
- var->transp.offset = 0;
- break;
- case 24:
- var->red.offset = 16;
- var->green.offset = 8;
- var->blue.offset = 0;
- var->red.length = 8;
- var->green.length = 8;
- var->blue.length = 8;
- var->transp.length = 0;
- var->transp.offset = 0;
- break;
- case 32:
- var->red.offset = 16;
- var->green.offset = 8;
- var->blue.offset = 0;
- var->red.length = 8;
- var->green.length = 8;
- var->blue.length = 8;
- var->transp.length = 8;
- var->transp.offset = 24;
- break;
- default:
+ /*
+ * drm fbdev emulation doesn't support changing the pixel format at all,
+ * so reject all pixel format changing requests.
+ */
+ if (!drm_fb_pixel_format_equal(var, &info->var)) {
+ DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n");
return -EINVAL;
}
+
return 0;
}
EXPORT_SYMBOL(drm_fb_helper_check_var);
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index e36004fbe453..2a15f2f9271e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -81,9 +81,19 @@ static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
int i;
for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) {
- unsigned long ideal = rate * i;
+ u64 ideal = (u64)rate * i;
unsigned long rounded;
+ /*
+ * ideal has overflowed the max value that can be stored in an
+ * unsigned long, and every clk operation we might do on a
+ * truncated u64 value will give us incorrect results.
+ * Let's just stop there since bigger dividers will result in
+ * the same overflow issue.
+ */
+ if (ideal > ULONG_MAX)
+ goto out;
+
rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
ideal);
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 9ee9a15e7134..9200e349f29e 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -2270,7 +2270,7 @@ EXPORT_SYMBOL(i2c_put_adapter);
*
* Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO.
* Or a valid pointer to be used with DMA. After use, release it by
- * calling i2c_release_dma_safe_msg_buf().
+ * calling i2c_put_dma_safe_msg_buf().
*
* This function must only be called from process context!
*/
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index faa9e6116b2f..73332b9a25b5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -46,6 +46,8 @@
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
+
#include <linux/uaccess.h>
#include <rdma/ib.h>
@@ -1120,6 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
return -EINVAL;
+ hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table));
if (hdr.in + sizeof(hdr) > len)
return -EINVAL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 21863ddde63e..01d68ed46c1b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -44,6 +44,8 @@
#include <linux/module.h>
#include <linux/nsproxy.h>
+#include <linux/nospec.h>
+
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
@@ -1676,6 +1678,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
return -EINVAL;
+ hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
if (hdr.in + sizeof(hdr) > len)
return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 088205d7f1a1..cca1820802b8 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -393,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
- mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
+ mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
}
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -728,16 +728,11 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
int nent,
int cqe_size)
{
- struct mlx5_frag_buf_ctrl *c = &buf->fbc;
- struct mlx5_frag_buf *frag_buf = &c->frag_buf;
- u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
+ struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
+ u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
+ u8 log_wq_sz = ilog2(cqe_size);
int err;
- MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
- MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
-
- mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
-
err = mlx5_frag_buf_alloc_node(dev->mdev,
nent * cqe_size,
frag_buf,
@@ -745,6 +740,8 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
if (err)
return err;
+ mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
+
buf->cqe_size = cqe_size;
buf->nent = nent;
@@ -934,7 +931,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
- cq->buf.fbc.frag_buf.npages;
+ cq->buf.frag_buf.npages;
*cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
@@ -942,11 +939,11 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
+ mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- cq->buf.fbc.frag_buf.page_shift -
+ cq->buf.frag_buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
*index = dev->mdev->priv.uar->index;
@@ -1365,11 +1362,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
if (!err) {
- struct mlx5_frag_buf_ctrl *c;
+ struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
- c = &cq->resize_buf->fbc;
- npas = c->frag_buf.npages;
- page_shift = c->frag_buf.page_shift;
+ npas = frag_buf->npages;
+ page_shift = frag_buf->page_shift;
}
}
@@ -1390,8 +1386,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
pas, 0);
else
- mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
- pas);
+ mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
MLX5_SET(modify_cq_in, in,
modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index f2f11e652dcd..66dc337e49a7 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -284,7 +284,7 @@ static bool devx_is_obj_create_cmd(const void *in)
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
- case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
@@ -627,9 +627,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
break;
- case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
- MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
break;
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5d9b7f62a0ba..af32899bb72a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2793,7 +2793,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
return -EINVAL;
action->flow_tag = ib_spec->flow_tag.tag_id;
- action->has_flow_tag = true;
+ action->flags |= FLOW_ACT_HAS_TAG;
break;
case IB_FLOW_SPEC_ACTION_DROP:
if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -2886,7 +2886,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+ (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
VALID_SPEC_VALID : VALID_SPEC_INVALID;
}
@@ -3320,15 +3320,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
err = flow_counters_set_data(flow_act.counters, ucmd);
if (err)
goto free;
+ mcounters = to_mcounters(flow_act.counters);
handler->ibcounters = flow_act.counters;
dest_arr[dest_num].type =
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter =
- to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+ dest_arr[dest_num].counter_id =
+ mlx5_fc_id(mcounters->hw_cntrs_hndl);
dest_num++;
}
@@ -3346,7 +3349,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
- if (flow_act.has_flow_tag &&
+ if ((flow_act.flags & FLOW_ACT_HAS_TAG) &&
(flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 320d4dfe8c2f..289c18db2611 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -435,6 +435,7 @@ struct mlx5_ib_qp {
struct mlx5_ib_cq_buf {
struct mlx5_frag_buf_ctrl fbc;
+ struct mlx5_frag_buf frag_buf;
struct ib_umem *umem;
int cqe_size;
int nent;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6cba2a02d11b..daf1eb84cd31 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1279,7 +1279,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
if (dev->rep)
MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
@@ -1582,7 +1582,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
create_tir:
if (dev->rep)
MLX5_SET(tirc, tirc, self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index f5ae24865355..b0f9d19b3410 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1346,6 +1346,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
{ "ELAN0611", 0 },
{ "ELAN0612", 0 },
{ "ELAN0618", 0 },
+ { "ELAN061C", 0 },
{ "ELAN061D", 0 },
{ "ELAN0622", 0 },
{ "ELAN1000", 0 },
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ee28ec9e0aba..ffa37adb7681 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -963,7 +963,8 @@ static inline void slave_disable_netpoll(struct slave *slave)
return;
slave->np = NULL;
- __netpoll_free_async(np);
+
+ __netpoll_free(np);
}
static void bond_poll_controller(struct net_device *bond_dev)
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 3017ecf82ca5..2eb68769562c 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1190,16 +1190,14 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
#ifdef CONFIG_PM_SLEEP
static int bcm_sf2_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+ struct bcm_sf2_priv *priv = dev_get_drvdata(dev);
return dsa_switch_suspend(priv->dev->ds);
}
static int bcm_sf2_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+ struct bcm_sf2_priv *priv = dev_get_drvdata(dev);
return dsa_switch_resume(priv->dev->ds);
}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 78ce820b5257..e05d4eddc935 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2907,7 +2907,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
.port_set_link = mv88e6xxx_port_set_link,
.port_set_duplex = mv88e6xxx_port_set_duplex,
.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
- .port_set_speed = mv88e6390_port_set_speed,
+ .port_set_speed = mv88e6341_port_set_speed,
.port_tag_remap = mv88e6095_port_tag_remap,
.port_set_frame_mode = mv88e6351_port_set_frame_mode,
.port_set_egress_floods = mv88e6352_port_set_egress_floods,
@@ -3528,7 +3528,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
.port_set_link = mv88e6xxx_port_set_link,
.port_set_duplex = mv88e6xxx_port_set_duplex,
.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
- .port_set_speed = mv88e6390_port_set_speed,
+ .port_set_speed = mv88e6341_port_set_speed,
.port_tag_remap = mv88e6095_port_tag_remap,
.port_set_frame_mode = mv88e6351_port_set_frame_mode,
.port_set_egress_floods = mv88e6352_port_set_egress_floods,
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 92945841c8e8..cd7db60a508b 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -228,8 +228,11 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port,
ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000;
break;
case 2500:
- ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 |
- MV88E6390_PORT_MAC_CTL_ALTSPEED;
+ if (alt_bit)
+ ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 |
+ MV88E6390_PORT_MAC_CTL_ALTSPEED;
+ else
+ ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000;
break;
case 10000:
/* all bits set, fall through... */
@@ -291,6 +294,24 @@ int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
return mv88e6xxx_port_set_speed(chip, port, speed, false, false);
}
+/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6341) */
+int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+ if (speed == SPEED_MAX)
+ speed = port < 5 ? 1000 : 2500;
+
+ if (speed > 2500)
+ return -EOPNOTSUPP;
+
+ if (speed == 200 && port != 0)
+ return -EOPNOTSUPP;
+
+ if (speed == 2500 && port < 5)
+ return -EOPNOTSUPP;
+
+ return mv88e6xxx_port_set_speed(chip, port, speed, !port, true);
+}
+
/* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */
int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
{
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index f32f56af8e35..36904c9bf955 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -269,6 +269,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup);
int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index cdcde7f8e0b2..7e97e620bd44 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -955,8 +955,7 @@ qca8k_set_pm(struct qca8k_priv *priv, int enable)
static int qca8k_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct qca8k_priv *priv = platform_get_drvdata(pdev);
+ struct qca8k_priv *priv = dev_get_drvdata(dev);
qca8k_set_pm(priv, 0);
@@ -965,8 +964,7 @@ static int qca8k_suspend(struct device *dev)
static int qca8k_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct qca8k_priv *priv = platform_get_drvdata(pdev);
+ struct qca8k_priv *priv = dev_get_drvdata(dev);
qca8k_set_pm(priv, 1);
diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
index 99b30353541a..9e87d7b8360f 100644
--- a/drivers/net/ethernet/amazon/Kconfig
+++ b/drivers/net/ethernet/amazon/Kconfig
@@ -17,7 +17,7 @@ if NET_VENDOR_AMAZON
config ENA_ETHERNET
tristate "Elastic Network Adapter (ENA) support"
- depends on (PCI_MSI && X86)
+ depends on PCI_MSI && !CPU_BIG_ENDIAN
---help---
This driver supports Elastic Network Adapter (ENA)"
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 284a0a612131..18956e7604a3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3022,20 +3022,10 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
int io_sq_num, io_queue_num;
/* In case of LLQ use the llq number in the get feature cmd */
- if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num;
-
- if (io_sq_num == 0) {
- dev_err(&pdev->dev,
- "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
-
- ena_dev->tx_mem_queue_type =
- ENA_ADMIN_PLACEMENT_POLICY_HOST;
- io_sq_num = get_feat_ctx->max_queues.max_sq_num;
- }
- } else {
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ io_sq_num = get_feat_ctx->llq.max_llq_num;
+ else
io_sq_num = get_feat_ctx->max_queues.max_sq_num;
- }
io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
io_queue_num = min_t(int, io_queue_num, io_sq_num);
@@ -3238,7 +3228,7 @@ static int ena_calc_queue_size(struct pci_dev *pdev,
if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
queue_size = min_t(u32, queue_size,
- get_feat_ctx->max_queues.max_legacy_llq_depth);
+ get_feat_ctx->llq.max_llq_depth);
queue_size = rounddown_pow_of_two(queue_size);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index c0568465e10b..096ca5730887 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -279,7 +279,7 @@ static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
return err;
}
-int aq_fw2x_update_stats(struct aq_hw_s *self)
+static int aq_fw2x_update_stats(struct aq_hw_s *self)
{
int err = 0;
u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 0fe57e36912b..498b373c992d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1471,7 +1471,7 @@ struct bnxt {
struct rx_port_stats *hw_rx_port_stats;
struct tx_port_stats *hw_tx_port_stats;
struct rx_port_stats_ext *hw_rx_port_stats_ext;
- struct rx_port_stats_ext *hw_tx_port_stats_ext;
+ struct tx_port_stats_ext *hw_tx_port_stats_ext;
dma_addr_t hw_rx_port_stats_map;
dma_addr_t hw_tx_port_stats_map;
dma_addr_t hw_rx_port_stats_ext_map;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index b756fc79424e..35564a8a48f9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -320,9 +320,12 @@ int bcmgenet_mii_probe(struct net_device *dev)
phydev->advertising = phydev->supported;
/* The internal PHY has its link interrupts routed to the
- * Ethernet MAC ISRs
+ * Ethernet MAC ISRs. On GENETv5 there is a hardware issue
+ * that prevents the signaling of link UP interrupts when
+ * the link operates at 10Mbps, so fallback to polling for
+ * those versions of GENET.
*/
- if (priv->internal_phy)
+ if (priv->internal_phy && !GENET_IS_V5(priv))
dev->phydev->irq = PHY_IGNORE_INTERRUPT;
return 0;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 0acaef3ef548..8f5bf9166c11 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4156,8 +4156,7 @@ static int macb_remove(struct platform_device *pdev)
static int __maybe_unused macb_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *netdev = platform_get_drvdata(pdev);
+ struct net_device *netdev = dev_get_drvdata(dev);
struct macb *bp = netdev_priv(netdev);
netif_carrier_off(netdev);
@@ -4179,8 +4178,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
static int __maybe_unused macb_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *netdev = platform_get_drvdata(pdev);
+ struct net_device *netdev = dev_get_drvdata(dev);
struct macb *bp = netdev_priv(netdev);
if (bp->wol & MACB_WOL_ENABLED) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 4bc211093c98..267322693ed5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -702,15 +702,14 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
* about any presently available devices that support its type. Returns
* %-EBUSY if a ULD of the same type is already registered.
*/
-int cxgb4_register_uld(enum cxgb4_uld type,
- const struct cxgb4_uld_info *p)
+void cxgb4_register_uld(enum cxgb4_uld type,
+ const struct cxgb4_uld_info *p)
{
int ret = 0;
- unsigned int adap_idx = 0;
struct adapter *adap;
if (type >= CXGB4_ULD_MAX)
- return -EINVAL;
+ return;
mutex_lock(&uld_mutex);
list_for_each_entry(adap, &adapter_list, list_node) {
@@ -733,52 +732,29 @@ int cxgb4_register_uld(enum cxgb4_uld type,
}
if (adap->flags & FULL_INIT_DONE)
enable_rx_uld(adap, type);
- if (adap->uld[type].add) {
- ret = -EBUSY;
+ if (adap->uld[type].add)
goto free_irq;
- }
ret = setup_sge_txq_uld(adap, type, p);
if (ret)
goto free_irq;
adap->uld[type] = *p;
uld_attach(adap, type);
- adap_idx++;
- }
- mutex_unlock(&uld_mutex);
- return 0;
-
+ continue;
free_irq:
- if (adap->flags & FULL_INIT_DONE)
- quiesce_rx_uld(adap, type);
- if (adap->flags & USING_MSIX)
- free_msix_queue_irqs_uld(adap, type);
-free_rxq:
- free_sge_queues_uld(adap, type);
-free_queues:
- free_queues_uld(adap, type);
-out:
-
- list_for_each_entry(adap, &adapter_list, list_node) {
- if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
- (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
- continue;
- if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
- continue;
- if (!adap_idx)
- break;
- adap->uld[type].handle = NULL;
- adap->uld[type].add = NULL;
- release_sge_txq_uld(adap, type);
if (adap->flags & FULL_INIT_DONE)
quiesce_rx_uld(adap, type);
if (adap->flags & USING_MSIX)
free_msix_queue_irqs_uld(adap, type);
+free_rxq:
free_sge_queues_uld(adap, type);
+free_queues:
free_queues_uld(adap, type);
- adap_idx--;
+out:
+ dev_warn(adap->pdev_dev,
+ "ULD registration failed for uld type %d\n", type);
}
mutex_unlock(&uld_mutex);
- return ret;
+ return;
}
EXPORT_SYMBOL(cxgb4_register_uld);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index de9ad311dacd..5fa9a2d5fc4b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -384,7 +384,7 @@ struct cxgb4_uld_info {
int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
};
-int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
+void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
int cxgb4_unregister_uld(enum cxgb4_uld type);
int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 50222b7b81f3..0a82fcf16d35 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1722,8 +1722,7 @@ out:
static int
dm9000_drv_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
struct board_info *db;
if (ndev) {
@@ -1745,8 +1744,7 @@ dm9000_drv_suspend(struct device *dev)
static int
dm9000_drv_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
struct board_info *db = netdev_priv(ndev);
if (ndev) {
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 4778b663653e..bf80855dd0dd 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -452,6 +452,10 @@ struct bufdesc_ex {
* initialisation.
*/
#define FEC_QUIRK_MIB_CLEAR (1 << 15)
+/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers,
+ * those FIFO receive registers are resolved in other platforms.
+ */
+#define FEC_QUIRK_HAS_FRREG (1 << 16)
struct bufdesc_prop {
int qid;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a17cc973d9a3..6db69ba30dcd 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -91,14 +91,16 @@ static struct platform_device_id fec_devtype[] = {
.driver_data = 0,
}, {
.name = "imx25-fec",
- .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR,
+ .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR |
+ FEC_QUIRK_HAS_FRREG,
}, {
.name = "imx27-fec",
- .driver_data = FEC_QUIRK_MIB_CLEAR,
+ .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG,
}, {
.name = "imx28-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
- FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC,
+ FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC |
+ FEC_QUIRK_HAS_FRREG,
}, {
.name = "imx6q-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
@@ -2162,7 +2164,13 @@ static void fec_enet_get_regs(struct net_device *ndev,
memset(buf, 0, regs->len);
for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) {
- off = fec_enet_register_offset[i] / 4;
+ off = fec_enet_register_offset[i];
+
+ if ((off == FEC_R_BOUND || off == FEC_R_FSTART) &&
+ !(fep->quirks & FEC_QUIRK_HAS_FRREG))
+ continue;
+
+ off >>= 2;
buf[off] = readl(&theregs[off]);
}
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index ce93fcce2732..76ce2f21178b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -977,35 +977,28 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb,
}
static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
- int size, dma_addr_t dma, int frag_end,
- enum hns_desc_type type)
+ int size, int frag_end, enum hns_desc_type type)
{
struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
struct hns3_desc *desc = &ring->desc[ring->next_to_use];
+ struct device *dev = ring_to_dev(ring);
u32 ol_type_vlan_len_msec = 0;
u16 bdtp_fe_sc_vld_ra_ri = 0;
+ struct skb_frag_struct *frag;
+ unsigned int frag_buf_num;
u32 type_cs_vlan_tso = 0;
struct sk_buff *skb;
u16 inner_vtag = 0;
u16 out_vtag = 0;
+ unsigned int k;
+ int sizeoflast;
u32 paylen = 0;
+ dma_addr_t dma;
u16 mss = 0;
u8 ol4_proto;
u8 il4_proto;
int ret;
- /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
- desc_cb->priv = priv;
- desc_cb->length = size;
- desc_cb->dma = dma;
- desc_cb->type = type;
-
- /* now, fill the descriptor */
- desc->addr = cpu_to_le64(dma);
- desc->tx.send_size = cpu_to_le16((u16)size);
- hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
- desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
-
if (type == DESC_TYPE_SKB) {
skb = (struct sk_buff *)priv;
paylen = skb->len;
@@ -1046,38 +1039,47 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
desc->tx.mss = cpu_to_le16(mss);
desc->tx.vlan_tag = cpu_to_le16(inner_vtag);
desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag);
- }
- /* move ring pointer to next.*/
- ring_ptr_move_fw(ring, next_to_use);
+ dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+ } else {
+ frag = (struct skb_frag_struct *)priv;
+ dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+ }
- return 0;
-}
+ if (dma_mapping_error(ring->dev, dma)) {
+ ring->stats.sw_err_cnt++;
+ return -ENOMEM;
+ }
-static int hns3_fill_desc_tso(struct hns3_enet_ring *ring, void *priv,
- int size, dma_addr_t dma, int frag_end,
- enum hns_desc_type type)
-{
- unsigned int frag_buf_num;
- unsigned int k;
- int sizeoflast;
- int ret;
+ desc_cb->length = size;
frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
sizeoflast = size % HNS3_MAX_BD_SIZE;
sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE;
- /* When the frag size is bigger than hardware, split this frag */
+ /* When frag size is bigger than hardware limit, split this frag */
for (k = 0; k < frag_buf_num; k++) {
- ret = hns3_fill_desc(ring, priv,
- (k == frag_buf_num - 1) ?
- sizeoflast : HNS3_MAX_BD_SIZE,
- dma + HNS3_MAX_BD_SIZE * k,
- frag_end && (k == frag_buf_num - 1) ? 1 : 0,
- (type == DESC_TYPE_SKB && !k) ?
- DESC_TYPE_SKB : DESC_TYPE_PAGE);
- if (ret)
- return ret;
+ /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
+ desc_cb->priv = priv;
+ desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k;
+ desc_cb->type = (type == DESC_TYPE_SKB && !k) ?
+ DESC_TYPE_SKB : DESC_TYPE_PAGE;
+
+ /* now, fill the descriptor */
+ desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k);
+ desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ?
+ (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE);
+ hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri,
+ frag_end && (k == frag_buf_num - 1) ?
+ 1 : 0);
+ desc->tx.bdtp_fe_sc_vld_ra_ri =
+ cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+
+ /* move ring pointer to next.*/
+ ring_ptr_move_fw(ring, next_to_use);
+
+ desc_cb = &ring->desc_cb[ring->next_to_use];
+ desc = &ring->desc[ring->next_to_use];
}
return 0;
@@ -1133,7 +1135,7 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum,
return 0;
}
-static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
+static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
{
struct device *dev = ring_to_dev(ring);
unsigned int i;
@@ -1149,12 +1151,14 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
ring->desc_cb[ring->next_to_use].dma,
ring->desc_cb[ring->next_to_use].length,
DMA_TO_DEVICE);
- else
+ else if (ring->desc_cb[ring->next_to_use].length)
dma_unmap_page(dev,
ring->desc_cb[ring->next_to_use].dma,
ring->desc_cb[ring->next_to_use].length,
DMA_TO_DEVICE);
+ ring->desc_cb[ring->next_to_use].length = 0;
+
/* rollback one */
ring_ptr_move_bw(ring, next_to_use);
}
@@ -1166,12 +1170,10 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
struct hns3_nic_ring_data *ring_data =
&tx_ring_data(priv, skb->queue_mapping);
struct hns3_enet_ring *ring = ring_data->ring;
- struct device *dev = priv->dev;
struct netdev_queue *dev_queue;
struct skb_frag_struct *frag;
int next_to_use_head;
int next_to_use_frag;
- dma_addr_t dma;
int buf_num;
int seg_num;
int size;
@@ -1206,35 +1208,23 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
next_to_use_head = ring->next_to_use;
- dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
- if (dma_mapping_error(dev, dma)) {
- netdev_err(netdev, "TX head DMA map failed\n");
- ring->stats.sw_err_cnt++;
- goto out_err_tx_ok;
- }
-
- ret = priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
- DESC_TYPE_SKB);
+ ret = priv->ops.fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0,
+ DESC_TYPE_SKB);
if (ret)
- goto head_dma_map_err;
+ goto head_fill_err;
next_to_use_frag = ring->next_to_use;
/* Fill the fragments */
for (i = 1; i < seg_num; i++) {
frag = &skb_shinfo(skb)->frags[i - 1];
size = skb_frag_size(frag);
- dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
- if (dma_mapping_error(dev, dma)) {
- netdev_err(netdev, "TX frag(%d) DMA map failed\n", i);
- ring->stats.sw_err_cnt++;
- goto frag_dma_map_err;
- }
- ret = priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
- seg_num - 1 == i ? 1 : 0,
- DESC_TYPE_PAGE);
+
+ ret = priv->ops.fill_desc(ring, frag, size,
+ seg_num - 1 == i ? 1 : 0,
+ DESC_TYPE_PAGE);
if (ret)
- goto frag_dma_map_err;
+ goto frag_fill_err;
}
/* Complete translate all packets */
@@ -1247,11 +1237,11 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK;
-frag_dma_map_err:
- hns_nic_dma_unmap(ring, next_to_use_frag);
+frag_fill_err:
+ hns3_clear_desc(ring, next_to_use_frag);
-head_dma_map_err:
- hns_nic_dma_unmap(ring, next_to_use_head);
+head_fill_err:
+ hns3_clear_desc(ring, next_to_use_head);
out_err_tx_ok:
dev_kfree_skb_any(skb);
@@ -1313,13 +1303,10 @@ static int hns3_nic_set_features(struct net_device *netdev,
int ret;
if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
- if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
- priv->ops.fill_desc = hns3_fill_desc_tso;
+ if (features & (NETIF_F_TSO | NETIF_F_TSO6))
priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
- } else {
- priv->ops.fill_desc = hns3_fill_desc;
+ else
priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
- }
}
if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) &&
@@ -1890,7 +1877,7 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
if (cb->type == DESC_TYPE_SKB)
dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
ring_to_dma_dir(ring));
- else
+ else if (cb->length)
dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
ring_to_dma_dir(ring));
}
@@ -3247,14 +3234,12 @@ static void hns3_nic_set_priv_ops(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
+ priv->ops.fill_desc = hns3_fill_desc;
if ((netdev->features & NETIF_F_TSO) ||
- (netdev->features & NETIF_F_TSO6)) {
- priv->ops.fill_desc = hns3_fill_desc_tso;
+ (netdev->features & NETIF_F_TSO6))
priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
- } else {
- priv->ops.fill_desc = hns3_fill_desc;
+ else
priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
- }
}
static int hns3_client_init(struct hnae3_handle *handle)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index f25b281ff2aa..71cfca132d0b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -419,8 +419,7 @@ struct hns3_nic_ring_data {
struct hns3_nic_ops {
int (*fill_desc)(struct hns3_enet_ring *ring, void *priv,
- int size, dma_addr_t dma, int frag_end,
- enum hns_desc_type type);
+ int size, int frag_end, enum hns_desc_type type);
int (*maybe_stop_tx)(struct sk_buff **out_skb,
int *bnum, struct hns3_enet_ring *ring);
void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 0f5563f3b779..097b5502603f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -58,6 +58,8 @@ enum hinic_port_cmd {
HINIC_PORT_CMD_GET_GLOBAL_QPN = 102,
+ HINIC_PORT_CMD_SET_TSO = 112,
+
HINIC_PORT_CMD_GET_CAP = 170,
};
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index cb239627770f..967c993d5303 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -70,8 +70,6 @@
#define SQ_MASKED_IDX(sq, idx) ((idx) & (sq)->wq->mask)
#define RQ_MASKED_IDX(rq, idx) ((idx) & (rq)->wq->mask)
-#define TX_MAX_MSS_DEFAULT 0x3E00
-
enum sq_wqe_type {
SQ_NORMAL_WQE = 0,
};
@@ -494,33 +492,16 @@ static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
HINIC_SQ_CTRL_SET(ctrl_size, LEN);
- ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
- QUEUE_INFO_MSS);
+ ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
+ QUEUE_INFO_MSS) |
+ HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
}
static void sq_prepare_task(struct hinic_sq_task *task)
{
- task->pkt_info0 =
- HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
- HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
- HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
- INNER_L3TYPE) |
- HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
- VLAN_OFFLOAD) |
- HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
-
- task->pkt_info1 =
- HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
- HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
- HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
-
- task->pkt_info2 =
- HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
- HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN) |
- HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
- TUNNEL_L4TYPE) |
- HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
- OUTER_L3TYPE);
+ task->pkt_info0 = 0;
+ task->pkt_info1 = 0;
+ task->pkt_info2 = 0;
task->ufo_v6_identify = 0;
@@ -529,6 +510,86 @@ static void sq_prepare_task(struct hinic_sq_task *task)
task->zero_pad = 0;
}
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
+{
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
+}
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+ enum hinic_l3_offload_type l3_type,
+ u32 network_len)
+{
+ task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
+ HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
+}
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+ enum hinic_l3_offload_type l3_type,
+ u32 network_len)
+{
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
+ task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
+}
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+ enum hinic_l4_offload_type l4_type,
+ u32 tunnel_len)
+{
+ task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
+ HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
+}
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+ enum hinic_l4_offload_type l4_offload,
+ u32 l4_len, u32 offset)
+{
+ u32 tcp_udp_cs = 0, sctp = 0;
+ u32 mss = HINIC_MSS_DEFAULT;
+
+ if (l4_offload == TCP_OFFLOAD_ENABLE ||
+ l4_offload == UDP_OFFLOAD_ENABLE)
+ tcp_udp_cs = 1;
+ else if (l4_offload == SCTP_OFFLOAD_ENABLE)
+ sctp = 1;
+
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+ task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+ *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+ HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
+ HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
+
+ *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+ *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+ enum hinic_l4_offload_type l4_offload,
+ u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
+{
+ u32 tso = 0, ufo = 0;
+
+ if (l4_offload == TCP_OFFLOAD_ENABLE)
+ tso = 1;
+ else if (l4_offload == UDP_OFFLOAD_ENABLE)
+ ufo = 1;
+
+ task->ufo_v6_identify = ip_ident;
+
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+ task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
+ task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+ *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+ HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
+ HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
+ HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
+
+ /* set MSS value */
+ *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+ *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
/**
* hinic_sq_prepare_wqe - prepare wqe before insert to the queue
* @sq: send queue
@@ -613,6 +674,16 @@ struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
}
/**
+ * hinic_sq_return_wqe - return the wqe to the sq
+ * @sq: send queue
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
+{
+ hinic_return_wqe(sq->wq, wqe_size);
+}
+
+/**
* hinic_sq_write_wqe - write the wqe to the sq
* @sq: send queue
* @prod_idx: pi of the wqe
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index 6c84f83ec283..a0dc63a4bfc7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -149,6 +149,31 @@ int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+ enum hinic_l3_offload_type l3_type,
+ u32 network_len);
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+ enum hinic_l3_offload_type l3_type,
+ u32 network_len);
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+ enum hinic_l4_offload_type l4_type,
+ u32 tunnel_len);
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
+ u32 *queue_info,
+ enum hinic_l4_offload_type l4_offload,
+ u32 l4_len, u32 offset);
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
+ u32 *queue_info,
+ enum hinic_l4_offload_type l4_offload,
+ u32 l4_len,
+ u32 offset, u32 ip_ident, u32 mss);
+
void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
int nr_sges);
@@ -159,6 +184,8 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
unsigned int wqe_size, u16 *prod_idx);
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
+
void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
struct hinic_sq_wqe *wqe, struct sk_buff *skb,
unsigned int wqe_size);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 3e3181c089bd..f92f1bf3901a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -775,6 +775,20 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
}
/**
+ * hinic_return_wqe - return the wqe when transmit failed
+ * @wq: wq to return wqe
+ * @wqe_size: wqe size
+ **/
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
+{
+ int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+ atomic_sub(num_wqebbs, &wq->prod_idx);
+
+ atomic_add(num_wqebbs, &wq->delta);
+}
+
+/**
* hinic_put_wqe - return the wqe place to use for a new wqe
* @wq: wq to return wqe
* @wqe_size: wqe size
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
index 9c030a0f035e..9b66545ba563 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
@@ -104,6 +104,8 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
u16 *prod_idx);
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
+
void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index bc73485483c5..9754d6ed5f4a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -62,19 +62,33 @@
(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
& HINIC_CMDQ_WQE_HEADER_##member##_MASK)
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0
-#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT 16
-#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT 22
-#define HINIC_SQ_CTRL_LEN_SHIFT 29
-
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFF
-#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK 0x1F
-#define HINIC_SQ_CTRL_DATA_FORMAT_MASK 0x1
-#define HINIC_SQ_CTRL_LEN_MASK 0x3
-
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13
-
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFF
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0
+#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT 16
+#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT 22
+#define HINIC_SQ_CTRL_LEN_SHIFT 29
+
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFF
+#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK 0x1F
+#define HINIC_SQ_CTRL_DATA_FORMAT_MASK 0x1
+#define HINIC_SQ_CTRL_LEN_MASK 0x3
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT 2
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT 10
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT 11
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT 12
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT 27
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT 28
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT 29
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK 0xFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK 0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK 0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK 0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK 0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK 0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK 0x7
#define HINIC_SQ_CTRL_SET(val, member) \
(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
@@ -84,6 +98,10 @@
(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
& HINIC_SQ_CTRL_##member##_MASK)
+#define HINIC_SQ_CTRL_CLEAR(val, member) \
+ ((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
+ << HINIC_SQ_CTRL_##member##_SHIFT)))
+
#define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT 0
#define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT 8
#define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT 10
@@ -108,28 +126,28 @@
/* 8 bits reserved */
#define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT 8
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT 16
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT 24
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT 16
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT 24
/* 8 bits reserved */
#define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK 0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK 0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK 0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK 0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK 0xFF
#define HINIC_SQ_TASK_INFO1_SET(val, member) \
(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) << \
HINIC_SQ_TASK_INFO1_##member##_SHIFT)
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT 12
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT 0
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT 8
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
/* 1 bit reserved */
-#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 22
+#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 24
/* 8 bits reserved */
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK 0xFFF
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK 0x7F
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x3
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK 0xFF
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK 0xFF
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x7
/* 1 bit reserved */
#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK 0x3
/* 8 bits reserved */
@@ -187,12 +205,15 @@
sizeof(struct hinic_sq_task) + \
(nr_sges) * sizeof(struct hinic_sq_bufdesc))
-#define HINIC_SCMD_DATA_LEN 16
+#define HINIC_SCMD_DATA_LEN 16
+
+#define HINIC_MAX_SQ_BUFDESCS 17
-#define HINIC_MAX_SQ_BUFDESCS 17
+#define HINIC_SQ_WQE_MAX_SIZE 320
+#define HINIC_RQ_WQE_SIZE 32
-#define HINIC_SQ_WQE_MAX_SIZE 320
-#define HINIC_RQ_WQE_SIZE 32
+#define HINIC_MSS_DEFAULT 0x3E00
+#define HINIC_MSS_MIN 0x50
enum hinic_l4offload_type {
HINIC_L4_OFF_DISABLE = 0,
@@ -211,6 +232,26 @@ enum hinic_pkt_parsed {
HINIC_PKT_PARSED = 1,
};
+enum hinic_l3_offload_type {
+ L3TYPE_UNKNOWN = 0,
+ IPV6_PKT = 1,
+ IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
+ IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
+};
+
+enum hinic_l4_offload_type {
+ OFFLOAD_DISABLE = 0,
+ TCP_OFFLOAD_ENABLE = 1,
+ SCTP_OFFLOAD_ENABLE = 2,
+ UDP_OFFLOAD_ENABLE = 3,
+};
+
+enum hinic_l4_tunnel_type {
+ NOT_TUNNEL,
+ TUNNEL_UDP_NO_CSUM,
+ TUNNEL_UDP_CSUM,
+};
+
enum hinic_outer_l3type {
HINIC_OUTER_L3TYPE_UNKNOWN = 0,
HINIC_OUTER_L3TYPE_IPV6 = 1,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 4a8f82938ed5..fdf2bdb6b0d0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -805,7 +805,8 @@ static const struct net_device_ops hinic_netdev_ops = {
static void netdev_features_init(struct net_device *netdev)
{
- netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
+ netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
netdev->vlan_features = netdev->hw_features;
@@ -863,6 +864,20 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
*out_size = sizeof(*ret_link_status);
}
+static int set_features(struct hinic_dev *nic_dev,
+ netdev_features_t pre_features,
+ netdev_features_t features, bool force_change)
+{
+ netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
+ int err = 0;
+
+ if (changed & NETIF_F_TSO)
+ err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
+ HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
+
+ return err;
+}
+
/**
* nic_dev_init - Initialize the NIC device
* @pdev: the NIC pci device
@@ -963,7 +978,12 @@ static int nic_dev_init(struct pci_dev *pdev)
hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
nic_dev, link_status_event_handler);
+ err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
+ if (err)
+ goto err_set_features;
+
SET_NETDEV_DEV(netdev, &pdev->dev);
+
err = register_netdev(netdev);
if (err) {
dev_err(&pdev->dev, "Failed to register netdev\n");
@@ -973,6 +993,7 @@ static int nic_dev_init(struct pci_dev *pdev)
return 0;
err_reg_netdev:
+err_set_features:
hinic_hwdev_cb_unregister(nic_dev->hwdev,
HINIC_MGMT_MSG_CMD_LINK_STATUS);
cancel_work_sync(&rx_mode_work->work);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 4d4e3f05fb5f..7575a7d3bd9f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -377,3 +377,35 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
return 0;
}
+
+/**
+ * hinic_port_set_tso - set port tso configuration
+ * @nic_dev: nic device
+ * @state: the tso state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
+{
+ struct hinic_hwdev *hwdev = nic_dev->hwdev;
+ struct hinic_hwif *hwif = hwdev->hwif;
+ struct hinic_tso_config tso_cfg = {0};
+ struct pci_dev *pdev = hwif->pdev;
+ u16 out_size;
+ int err;
+
+ tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+ tso_cfg.tso_en = state;
+
+ err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
+ &tso_cfg, sizeof(tso_cfg),
+ &tso_cfg, &out_size);
+ if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
+ dev_err(&pdev->dev,
+ "Failed to set port tso, ret = %d\n",
+ tso_cfg.status);
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 9404365195dd..f6e3220fe28f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -72,6 +72,11 @@ enum hinic_speed {
HINIC_SPEED_UNKNOWN = 0xFF,
};
+enum hinic_tso_state {
+ HINIC_TSO_DISABLE = 0,
+ HINIC_TSO_ENABLE = 1,
+};
+
struct hinic_port_mac_cmd {
u8 status;
u8 version;
@@ -167,6 +172,17 @@ struct hinic_port_cap {
u8 rsvd2[3];
};
+struct hinic_tso_config {
+ u8 status;
+ u8 version;
+ u8 rsvd0[6];
+
+ u16 func_id;
+ u16 rsvd1;
+ u8 tso_en;
+ u8 resv2[3];
+};
+
int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
u16 vlan_id);
@@ -195,4 +211,6 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
int hinic_port_get_cap(struct hinic_dev *nic_dev,
struct hinic_port_cap *port_cap);
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
+
#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index c5fca0356c9c..11e73e67358d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -26,6 +26,13 @@
#include <linux/skbuff.h>
#include <linux/smp.h>
#include <asm/byteorder.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
#include "hinic_common.h"
#include "hinic_hw_if.h"
@@ -45,9 +52,31 @@
#define CI_UPDATE_NO_PENDING 0
#define CI_UPDATE_NO_COALESC 0
-#define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
+#define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
-#define MIN_SKB_LEN 64
+#define MIN_SKB_LEN 17
+
+#define MAX_PAYLOAD_OFFSET 221
+#define TRANSPORT_OFFSET(l4_hdr, skb) ((u32)((l4_hdr) - (skb)->data))
+
+union hinic_l3 {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+};
+
+union hinic_l4 {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ unsigned char *hdr;
+};
+
+enum hinic_offload_type {
+ TX_OFFLOAD_TSO = BIT(0),
+ TX_OFFLOAD_CSUM = BIT(1),
+ TX_OFFLOAD_VLAN = BIT(2),
+ TX_OFFLOAD_INVALID = BIT(3),
+};
/**
* hinic_txq_clean_stats - Clean the statistics of specific queue
@@ -175,18 +204,263 @@ static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
DMA_TO_DEVICE);
}
+static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
+ union hinic_l4 *l4,
+ enum hinic_offload_type offload_type,
+ enum hinic_l3_offload_type *l3_type,
+ u8 *l4_proto)
+{
+ u8 *exthdr;
+
+ if (ip->v4->version == 4) {
+ *l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
+ IPV4_PKT_NO_CHKSUM_OFFLOAD :
+ IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+ *l4_proto = ip->v4->protocol;
+ } else if (ip->v4->version == 6) {
+ *l3_type = IPV6_PKT;
+ exthdr = ip->hdr + sizeof(*ip->v6);
+ *l4_proto = ip->v6->nexthdr;
+ if (exthdr != l4->hdr) {
+ int start = exthdr - skb->data;
+ __be16 frag_off;
+
+ ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
+ }
+ } else {
+ *l3_type = L3TYPE_UNKNOWN;
+ *l4_proto = 0;
+ }
+}
+
+static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
+ enum hinic_offload_type offload_type, u8 l4_proto,
+ enum hinic_l4_offload_type *l4_offload,
+ u32 *l4_len, u32 *offset)
+{
+ *l4_offload = OFFLOAD_DISABLE;
+ *offset = 0;
+ *l4_len = 0;
+
+ switch (l4_proto) {
+ case IPPROTO_TCP:
+ *l4_offload = TCP_OFFLOAD_ENABLE;
+ /* doff in unit of 4B */
+ *l4_len = l4->tcp->doff * 4;
+ *offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
+ break;
+
+ case IPPROTO_UDP:
+ *l4_offload = UDP_OFFLOAD_ENABLE;
+ *l4_len = sizeof(struct udphdr);
+ *offset = TRANSPORT_OFFSET(l4->hdr, skb);
+ break;
+
+ case IPPROTO_SCTP:
+ /* only csum offload support sctp */
+ if (offload_type != TX_OFFLOAD_CSUM)
+ break;
+
+ *l4_offload = SCTP_OFFLOAD_ENABLE;
+ *l4_len = sizeof(struct sctphdr);
+ *offset = TRANSPORT_OFFSET(l4->hdr, skb);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
+{
+ return (ip->v4->version == 4) ?
+ csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
+ csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
+}
+
+static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
+ struct sk_buff *skb)
+{
+ u32 offset, l4_len, ip_identify, network_hdr_len;
+ enum hinic_l3_offload_type l3_offload;
+ enum hinic_l4_offload_type l4_offload;
+ union hinic_l3 ip;
+ union hinic_l4 l4;
+ u8 l4_proto;
+
+ if (!skb_is_gso(skb))
+ return 0;
+
+ if (skb_cow_head(skb, 0) < 0)
+ return -EPROTONOSUPPORT;
+
+ if (skb->encapsulation) {
+ u32 gso_type = skb_shinfo(skb)->gso_type;
+ u32 tunnel_type = 0;
+ u32 l4_tunnel_len;
+
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+ network_hdr_len = skb_inner_network_header_len(skb);
+
+ if (ip.v4->version == 4) {
+ ip.v4->tot_len = 0;
+ l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+ } else if (ip.v4->version == 6) {
+ l3_offload = IPV6_PKT;
+ } else {
+ l3_offload = 0;
+ }
+
+ hinic_task_set_outter_l3(task, l3_offload,
+ skb_network_header_len(skb));
+
+ if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
+ l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
+ tunnel_type = TUNNEL_UDP_CSUM;
+ } else if (gso_type & SKB_GSO_UDP_TUNNEL) {
+ tunnel_type = TUNNEL_UDP_NO_CSUM;
+ }
+
+ l4_tunnel_len = skb_inner_network_offset(skb) -
+ skb_transport_offset(skb);
+ hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
+
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_inner_transport_header(skb);
+ } else {
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+ network_hdr_len = skb_network_header_len(skb);
+ }
+
+ /* initialize inner IP header fields */
+ if (ip.v4->version == 4)
+ ip.v4->tot_len = 0;
+ else
+ ip.v6->payload_len = 0;
+
+ get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
+ &l4_proto);
+
+ hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
+
+ ip_identify = 0;
+ if (l4_proto == IPPROTO_TCP)
+ l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
+
+ get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
+ &l4_len, &offset);
+
+ hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
+ ip_identify, skb_shinfo(skb)->gso_size);
+
+ return 1;
+}
+
+static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
+ struct sk_buff *skb)
+{
+ enum hinic_l4_offload_type l4_offload;
+ u32 offset, l4_len, network_hdr_len;
+ enum hinic_l3_offload_type l3_type;
+ union hinic_l3 ip;
+ union hinic_l4 l4;
+ u8 l4_proto;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ if (skb->encapsulation) {
+ u32 l4_tunnel_len;
+
+ ip.hdr = skb_network_header(skb);
+
+ if (ip.v4->version == 4)
+ l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
+ else if (ip.v4->version == 6)
+ l3_type = IPV6_PKT;
+ else
+ l3_type = L3TYPE_UNKNOWN;
+
+ hinic_task_set_outter_l3(task, l3_type,
+ skb_network_header_len(skb));
+
+ l4_tunnel_len = skb_inner_network_offset(skb) -
+ skb_transport_offset(skb);
+
+ hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
+ l4_tunnel_len);
+
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_inner_transport_header(skb);
+ network_hdr_len = skb_inner_network_header_len(skb);
+ } else {
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+ network_hdr_len = skb_network_header_len(skb);
+ }
+
+ get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
+ &l4_proto);
+
+ hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
+
+ get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
+ &l4_len, &offset);
+
+ hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
+
+ return 1;
+}
+
+static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
+ u32 *queue_info)
+{
+ enum hinic_offload_type offload = 0;
+ int enabled;
+
+ enabled = offload_tso(task, queue_info, skb);
+ if (enabled > 0) {
+ offload |= TX_OFFLOAD_TSO;
+ } else if (enabled == 0) {
+ enabled = offload_csum(task, queue_info, skb);
+ if (enabled)
+ offload |= TX_OFFLOAD_CSUM;
+ } else {
+ return -EPROTONOSUPPORT;
+ }
+
+ if (offload)
+ hinic_task_set_l2hdr(task, skb_network_offset(skb));
+
+ /* payload offset should not more than 221 */
+ if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
+ MAX_PAYLOAD_OFFSET) {
+ return -EPROTONOSUPPORT;
+ }
+
+ /* mss should not less than 80 */
+ if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
+ *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+ *queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
+ }
+
+ return 0;
+}
+
netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
struct hinic_dev *nic_dev = netdev_priv(netdev);
+ u16 prod_idx, q_id = skb->queue_mapping;
struct netdev_queue *netdev_txq;
int nr_sges, err = NETDEV_TX_OK;
struct hinic_sq_wqe *sq_wqe;
unsigned int wqe_size;
struct hinic_txq *txq;
struct hinic_qp *qp;
- u16 prod_idx;
- txq = &nic_dev->txqs[skb->queue_mapping];
+ txq = &nic_dev->txqs[q_id];
qp = container_of(txq->sq, struct hinic_qp, sq);
if (skb->len < MIN_SKB_LEN) {
@@ -236,15 +510,23 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
process_sq_wqe:
hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
+ err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
+ if (err)
+ goto offload_error;
+
hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
flush_skbs:
- netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
+ netdev_txq = netdev_get_tx_queue(netdev, q_id);
if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
return err;
+offload_error:
+ hinic_sq_return_wqe(txq->sq, wqe_size);
+ tx_unmap_skb(nic_dev, skb, txq->sges);
+
skb_error:
dev_kfree_skb_any(skb);
@@ -252,7 +534,8 @@ update_error_stats:
u64_stats_update_begin(&txq->txq_stats.syncp);
txq->txq_stats.tx_dropped++;
u64_stats_update_end(&txq->txq_stats.syncp);
- return err;
+
+ return NETDEV_TX_OK;
}
/**
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index b542aba6f0e8..fd3373d82a9e 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -68,6 +68,9 @@ config E1000E
<http://support.intel.com>
+ More specific information on configuring the driver is in
+ <file:Documentation/networking/e1000e.rst>.
+
To compile this driver as a module, choose M here. The module
will be called e1000e.
@@ -94,7 +97,7 @@ config IGB
<http://support.intel.com>
More specific information on configuring the driver is in
- <file:Documentation/networking/e1000.rst>.
+ <file:Documentation/networking/igb.rst>.
To compile this driver as a module, choose M here. The module
will be called igb.
@@ -130,7 +133,7 @@ config IGBVF
<http://support.intel.com>
More specific information on configuring the driver is in
- <file:Documentation/networking/e1000.rst>.
+ <file:Documentation/networking/igbvf.rst>.
To compile this driver as a module, choose M here. The module
will be called igbvf.
@@ -147,7 +150,7 @@ config IXGB
<http://support.intel.com>
More specific information on configuring the driver is in
- <file:Documentation/networking/ixgb.txt>.
+ <file:Documentation/networking/ixgb.rst>.
To compile this driver as a module, choose M here. The module
will be called ixgb.
@@ -164,6 +167,9 @@ config IXGBE
<http://support.intel.com>
+ More specific information on configuring the driver is in
+ <file:Documentation/networking/ixgbe.rst>.
+
To compile this driver as a module, choose M here. The module
will be called ixgbe.
@@ -205,7 +211,7 @@ config IXGBEVF
<http://support.intel.com>
More specific information on configuring the driver is in
- <file:Documentation/networking/ixgbevf.txt>.
+ <file:Documentation/networking/ixgbevf.rst>.
To compile this driver as a module, choose M here. The module
will be called ixgbevf. MSI-X interrupt support is required
@@ -222,6 +228,9 @@ config I40E
<http://support.intel.com>
+ More specific information on configuring the driver is in
+ <file:Documentation/networking/i40e.rst>.
+
To compile this driver as a module, choose M here. The module
will be called i40e.
@@ -254,6 +263,9 @@ config I40EVF
This driver was formerly named i40evf.
+ More specific information on configuring the driver is in
+ <file:Documentation/networking/iavf.rst>.
+
To compile this driver as a module, choose M here. The module
will be called iavf. MSI-X interrupt support is required
for this driver to work correctly.
@@ -269,6 +281,9 @@ config ICE
<http://support.intel.com>
+ More specific information on configuring the driver is in
+ <file:Documentation/networking/ice.rst>.
+
To compile this driver as a module, choose M here. The module
will be called ice.
@@ -284,7 +299,26 @@ config FM10K
<http://support.intel.com>
+ More specific information on configuring the driver is in
+ <file:Documentation/networking/fm10k.rst>.
+
To compile this driver as a module, choose M here. The module
will be called fm10k. MSI-X interrupt support is required
+config IGC
+ tristate "Intel(R) Ethernet Controller I225-LM/I225-V support"
+ default n
+ depends on PCI
+ ---help---
+ This driver supports Intel(R) Ethernet Controller I225-LM/I225-V
+ family of adapters.
+
+ For more information on how to identify your adapter, go
+ to the Adapter & Driver ID Guide that can be located at:
+
+ <http://support.intel.com>
+
+ To compile this driver as a module, choose M here. The module
+ will be called igc.
+
endif # NET_VENDOR_INTEL
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index b91153df6ee8..3075290063f6 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_E100) += e100.o
obj-$(CONFIG_E1000) += e1000/
obj-$(CONFIG_E1000E) += e1000e/
obj-$(CONFIG_IGB) += igb/
+obj-$(CONFIG_IGC) += igc/
obj-$(CONFIG_IGBVF) += igbvf/
obj-$(CONFIG_IXGBE) += ixgbe/
obj-$(CONFIG_IXGBEVF) += ixgbevf/
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
new file mode 100644
index 000000000000..4387f6ba8e67
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Intel Corporation
+
+#
+# Intel(R) I225-LM/I225-V 2.5G Ethernet Controller
+#
+
+obj-$(CONFIG_IGC) += igc.o
+
+igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
new file mode 100644
index 000000000000..cdf18a5d9e08
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -0,0 +1,443 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_H_
+#define _IGC_H_
+
+#include <linux/kobject.h>
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#include <linux/ethtool.h>
+
+#include <linux/sctp.h>
+
+#define IGC_ERR(args...) pr_err("igc: " args)
+
+#define PFX "igc: "
+
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+
+#include "igc_hw.h"
+
+/* main */
+extern char igc_driver_name[];
+extern char igc_driver_version[];
+
+/* Interrupt defines */
+#define IGC_START_ITR 648 /* ~6000 ints/sec */
+#define IGC_FLAG_HAS_MSI BIT(0)
+#define IGC_FLAG_QUEUE_PAIRS BIT(4)
+#define IGC_FLAG_NEED_LINK_UPDATE BIT(9)
+#define IGC_FLAG_MEDIA_RESET BIT(10)
+#define IGC_FLAG_MAS_ENABLE BIT(12)
+#define IGC_FLAG_HAS_MSIX BIT(13)
+#define IGC_FLAG_VLAN_PROMISC BIT(15)
+
+#define IGC_START_ITR 648 /* ~6000 ints/sec */
+#define IGC_4K_ITR 980
+#define IGC_20K_ITR 196
+#define IGC_70K_ITR 56
+
+#define IGC_DEFAULT_ITR 3 /* dynamic */
+#define IGC_MAX_ITR_USECS 10000
+#define IGC_MIN_ITR_USECS 10
+#define NON_Q_VECTORS 1
+#define MAX_MSIX_ENTRIES 10
+
+/* TX/RX descriptor defines */
+#define IGC_DEFAULT_TXD 256
+#define IGC_DEFAULT_TX_WORK 128
+#define IGC_MIN_TXD 80
+#define IGC_MAX_TXD 4096
+
+#define IGC_DEFAULT_RXD 256
+#define IGC_MIN_RXD 80
+#define IGC_MAX_RXD 4096
+
+/* Transmit and receive queues */
+#define IGC_MAX_RX_QUEUES 4
+#define IGC_MAX_TX_QUEUES 4
+
+#define MAX_Q_VECTORS 8
+#define MAX_STD_JUMBO_FRAME_SIZE 9216
+
+/* Supported Rx Buffer Sizes */
+#define IGC_RXBUFFER_256 256
+#define IGC_RXBUFFER_2048 2048
+#define IGC_RXBUFFER_3072 3072
+
+#define IGC_RX_HDR_LEN IGC_RXBUFFER_256
+
+/* RX and TX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ * descriptors available in its onboard memory.
+ * Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ * available in host memory.
+ * If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ * descriptors until either it has this many to write back, or the
+ * ITR timer expires.
+ */
+#define IGC_RX_PTHRESH 8
+#define IGC_RX_HTHRESH 8
+#define IGC_TX_PTHRESH 8
+#define IGC_TX_HTHRESH 1
+#define IGC_RX_WTHRESH 4
+#define IGC_TX_WTHRESH 16
+
+#define IGC_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define IGC_TS_HDR_LEN 16
+
+#define IGC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+
+#if (PAGE_SIZE < 8192)
+#define IGC_MAX_FRAME_BUILD_SKB \
+ (SKB_WITH_OVERHEAD(IGC_RXBUFFER_2048) - IGC_SKB_PAD - IGC_TS_HDR_LEN)
+#else
+#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN)
+#endif
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGC_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+
+/* igc_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
+ const u32 stat_err_bits)
+{
+ return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+enum igc_state_t {
+ __IGC_TESTING,
+ __IGC_RESETTING,
+ __IGC_DOWN,
+ __IGC_PTP_TX_IN_PROGRESS,
+};
+
+enum igc_tx_flags {
+ /* cmd_type flags */
+ IGC_TX_FLAGS_VLAN = 0x01,
+ IGC_TX_FLAGS_TSO = 0x02,
+ IGC_TX_FLAGS_TSTAMP = 0x04,
+
+ /* olinfo flags */
+ IGC_TX_FLAGS_IPV4 = 0x10,
+ IGC_TX_FLAGS_CSUM = 0x20,
+};
+
+enum igc_boards {
+ board_base,
+};
+
+/* The largest size we can write to the descriptor is 65535. In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGC_MAX_TXD_PWR 15
+#define IGC_MAX_DATA_PER_TXD BIT(IGC_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct igc_tx_buffer {
+ union igc_adv_tx_desc *next_to_watch;
+ unsigned long time_stamp;
+ struct sk_buff *skb;
+ unsigned int bytecount;
+ u16 gso_segs;
+ __be16 protocol;
+
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+ u32 tx_flags;
+};
+
+struct igc_rx_buffer {
+ dma_addr_t dma;
+ struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+ __u32 page_offset;
+#else
+ __u16 page_offset;
+#endif
+ __u16 pagecnt_bias;
+};
+
+struct igc_tx_queue_stats {
+ u64 packets;
+ u64 bytes;
+ u64 restart_queue;
+ u64 restart_queue2;
+};
+
+struct igc_rx_queue_stats {
+ u64 packets;
+ u64 bytes;
+ u64 drops;
+ u64 csum_err;
+ u64 alloc_failed;
+};
+
+struct igc_rx_packet_stats {
+ u64 ipv4_packets; /* IPv4 headers processed */
+ u64 ipv4e_packets; /* IPv4E headers with extensions processed */
+ u64 ipv6_packets; /* IPv6 headers processed */
+ u64 ipv6e_packets; /* IPv6E headers with extensions processed */
+ u64 tcp_packets; /* TCP headers processed */
+ u64 udp_packets; /* UDP headers processed */
+ u64 sctp_packets; /* SCTP headers processed */
+ u64 nfs_packets; /* NFS headers processe */
+ u64 other_packets;
+};
+
+struct igc_ring_container {
+ struct igc_ring *ring; /* pointer to linked list of rings */
+ unsigned int total_bytes; /* total bytes processed this int */
+ unsigned int total_packets; /* total packets processed this int */
+ u16 work_limit; /* total work allowed per interrupt */
+ u8 count; /* total number of rings in vector */
+ u8 itr; /* current ITR setting for ring */
+};
+
+struct igc_ring {
+ struct igc_q_vector *q_vector; /* backlink to q_vector */
+ struct net_device *netdev; /* back pointer to net_device */
+ struct device *dev; /* device for dma mapping */
+ union { /* array of buffer info structs */
+ struct igc_tx_buffer *tx_buffer_info;
+ struct igc_rx_buffer *rx_buffer_info;
+ };
+ void *desc; /* descriptor ring memory */
+ unsigned long flags; /* ring specific flags */
+ void __iomem *tail; /* pointer to ring tail register */
+ dma_addr_t dma; /* phys address of the ring */
+ unsigned int size; /* length of desc. ring in bytes */
+
+ u16 count; /* number of desc. in the ring */
+ u8 queue_index; /* logical index of the ring*/
+ u8 reg_idx; /* physical index of the ring */
+
+ /* everything past this point are written often */
+ u16 next_to_clean;
+ u16 next_to_use;
+ u16 next_to_alloc;
+
+ union {
+ /* TX */
+ struct {
+ struct igc_tx_queue_stats tx_stats;
+ struct u64_stats_sync tx_syncp;
+ struct u64_stats_sync tx_syncp2;
+ };
+ /* RX */
+ struct {
+ struct igc_rx_queue_stats rx_stats;
+ struct igc_rx_packet_stats pkt_stats;
+ struct u64_stats_sync rx_syncp;
+ struct sk_buff *skb;
+ };
+ };
+} ____cacheline_internodealigned_in_smp;
+
+struct igc_q_vector {
+ struct igc_adapter *adapter; /* backlink */
+ void __iomem *itr_register;
+ u32 eims_value; /* EIMS mask value */
+
+ u16 itr_val;
+ u8 set_itr;
+
+ struct igc_ring_container rx, tx;
+
+ struct napi_struct napi;
+
+ struct rcu_head rcu; /* to avoid race with update stats on free */
+ char name[IFNAMSIZ + 9];
+ struct net_device poll_dev;
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct igc_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+struct igc_mac_addr {
+ u8 addr[ETH_ALEN];
+ u8 queue;
+ u8 state; /* bitmask */
+};
+
+#define IGC_MAC_STATE_DEFAULT 0x1
+#define IGC_MAC_STATE_MODIFIED 0x2
+#define IGC_MAC_STATE_IN_USE 0x4
+
+/* Board specific private data structure */
+struct igc_adapter {
+ struct net_device *netdev;
+
+ unsigned long state;
+ unsigned int flags;
+ unsigned int num_q_vectors;
+
+ struct msix_entry *msix_entries;
+
+ /* TX */
+ u16 tx_work_limit;
+ u32 tx_timeout_count;
+ int num_tx_queues;
+ struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES];
+
+ /* RX */
+ int num_rx_queues;
+ struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES];
+
+ struct timer_list watchdog_timer;
+ struct timer_list dma_err_timer;
+ struct timer_list phy_info_timer;
+
+ u16 link_speed;
+ u16 link_duplex;
+
+ u8 port_num;
+
+ u8 __iomem *io_addr;
+ /* Interrupt Throttle Rate */
+ u32 rx_itr_setting;
+ u32 tx_itr_setting;
+
+ struct work_struct reset_task;
+ struct work_struct watchdog_task;
+ struct work_struct dma_err_task;
+ bool fc_autoneg;
+
+ u8 tx_timeout_factor;
+
+ int msg_enable;
+ u32 max_frame_size;
+ u32 min_frame_size;
+
+ /* OS defined structs */
+ struct pci_dev *pdev;
+ /* lock for statistics */
+ spinlock_t stats64_lock;
+ struct rtnl_link_stats64 stats64;
+
+ /* structs defined in igc_hw.h */
+ struct igc_hw hw;
+ struct igc_hw_stats stats;
+
+ struct igc_q_vector *q_vector[MAX_Q_VECTORS];
+ u32 eims_enable_mask;
+ u32 eims_other;
+
+ u16 tx_ring_count;
+ u16 rx_ring_count;
+
+ u32 *shadow_vfta;
+
+ u32 rss_queues;
+
+ /* lock for RX network flow classification filter */
+ spinlock_t nfc_lock;
+
+ struct igc_mac_addr *mac_table;
+
+ unsigned long link_check_timeout;
+ struct igc_info ei;
+};
+
+/* igc_desc_unused - calculate if we have unused descriptors */
+static inline u16 igc_desc_unused(const struct igc_ring *ring)
+{
+ u16 ntc = ring->next_to_clean;
+ u16 ntu = ring->next_to_use;
+
+ return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+static inline s32 igc_get_phy_info(struct igc_hw *hw)
+{
+ if (hw->phy.ops.get_phy_info)
+ return hw->phy.ops.get_phy_info(hw);
+
+ return 0;
+}
+
+static inline s32 igc_reset_phy(struct igc_hw *hw)
+{
+ if (hw->phy.ops.reset)
+ return hw->phy.ops.reset(hw);
+
+ return 0;
+}
+
+static inline struct netdev_queue *txring_txq(const struct igc_ring *tx_ring)
+{
+ return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+
+enum igc_ring_flags_t {
+ IGC_RING_FLAG_RX_3K_BUFFER,
+ IGC_RING_FLAG_RX_BUILD_SKB_ENABLED,
+ IGC_RING_FLAG_RX_SCTP_CSUM,
+ IGC_RING_FLAG_RX_LB_VLAN_BSWAP,
+ IGC_RING_FLAG_TX_CTX_IDX,
+ IGC_RING_FLAG_TX_DETECT_HANG
+};
+
+#define ring_uses_large_buffer(ring) \
+ test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+ test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igc_rx_bufsz(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return IGC_RXBUFFER_3072;
+
+ if (ring_uses_build_skb(ring))
+ return IGC_MAX_FRAME_BUILD_SKB + IGC_TS_HDR_LEN;
+#endif
+ return IGC_RXBUFFER_2048;
+}
+
+static inline unsigned int igc_rx_pg_order(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
+{
+ if (hw->phy.ops.read_reg)
+ return hw->phy.ops.read_reg(hw, offset, data);
+
+ return 0;
+}
+
+#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
+
+#define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
+
+#define IGC_RX_DESC(R, i) \
+ (&(((union igc_adv_rx_desc *)((R)->desc))[i]))
+#define IGC_TX_DESC(R, i) \
+ (&(((union igc_adv_tx_desc *)((R)->desc))[i]))
+#define IGC_TX_CTXTDESC(R, i) \
+ (&(((struct igc_adv_tx_context_desc *)((R)->desc))[i]))
+
+#endif /* _IGC_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
new file mode 100644
index 000000000000..832da609d9a7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+#include "igc_i225.h"
+#include "igc_mac.h"
+#include "igc_base.h"
+#include "igc.h"
+
+/**
+ * igc_set_pcie_completion_timeout - set pci-e completion timeout
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw)
+{
+ u32 gcr = rd32(IGC_GCR);
+ u16 pcie_devctl2;
+ s32 ret_val = 0;
+
+ /* only take action if timeout value is defaulted to 0 */
+ if (gcr & IGC_GCR_CMPL_TMOUT_MASK)
+ goto out;
+
+ /* if capabilities version is type 1 we can write the
+ * timeout of 10ms to 200ms through the GCR register
+ */
+ if (!(gcr & IGC_GCR_CAP_VER2)) {
+ gcr |= IGC_GCR_CMPL_TMOUT_10ms;
+ goto out;
+ }
+
+ /* for version 2 capabilities we need to write the config space
+ * directly in order to set the completion timeout value for
+ * 16ms to 55ms
+ */
+ ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+ &pcie_devctl2);
+ if (ret_val)
+ goto out;
+
+ pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
+
+ ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+ &pcie_devctl2);
+out:
+ /* disable completion timeout resend */
+ gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND;
+
+ wr32(IGC_GCR, gcr);
+
+ return ret_val;
+}
+
+/**
+ * igc_check_for_link_base - Check for link
+ * @hw: pointer to the HW structure
+ *
+ * If sgmii is enabled, then use the pcs register to determine link, otherwise
+ * use the generic interface for determining link.
+ */
+static s32 igc_check_for_link_base(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+
+ ret_val = igc_check_for_copper_link(hw);
+
+ return ret_val;
+}
+
+/**
+ * igc_reset_hw_base - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets the hardware into a known state. This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_reset_hw_base(struct igc_hw *hw)
+{
+ s32 ret_val;
+ u32 ctrl;
+
+ /* Prevent the PCI-E bus from sticking if there is no TLP connection
+ * on the last TLP read/write transaction when MAC is reset.
+ */
+ ret_val = igc_disable_pcie_master(hw);
+ if (ret_val)
+ hw_dbg("PCI-E Master disable polling has failed.\n");
+
+ /* set the completion timeout for interface */
+ ret_val = igc_set_pcie_completion_timeout(hw);
+ if (ret_val)
+ hw_dbg("PCI-E Set completion timeout has failed.\n");
+
+ hw_dbg("Masking off all interrupts\n");
+ wr32(IGC_IMC, 0xffffffff);
+
+ wr32(IGC_RCTL, 0);
+ wr32(IGC_TCTL, IGC_TCTL_PSP);
+ wrfl();
+
+ usleep_range(10000, 20000);
+
+ ctrl = rd32(IGC_CTRL);
+
+ hw_dbg("Issuing a global reset to MAC\n");
+ wr32(IGC_CTRL, ctrl | IGC_CTRL_RST);
+
+ ret_val = igc_get_auto_rd_done(hw);
+ if (ret_val) {
+ /* When auto config read does not complete, do not
+ * return with an error. This can happen in situations
+ * where there is no eeprom and prevents getting link.
+ */
+ hw_dbg("Auto Read Done did not complete\n");
+ }
+
+ /* Clear any pending interrupt events. */
+ wr32(IGC_IMC, 0xffffffff);
+ rd32(IGC_ICR);
+
+ return ret_val;
+}
+
+/**
+ * igc_get_phy_id_base - Retrieve PHY addr and id
+ * @hw: pointer to the HW structure
+ *
+ * Retrieves the PHY address and ID for both PHY's which do and do not use
+ * sgmi interface.
+ */
+static s32 igc_get_phy_id_base(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+
+ ret_val = igc_get_phy_id(hw);
+
+ return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_base - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_nvm_params_base(struct igc_hw *hw)
+{
+ struct igc_nvm_info *nvm = &hw->nvm;
+ u32 eecd = rd32(IGC_EECD);
+ u16 size;
+
+ size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >>
+ IGC_EECD_SIZE_EX_SHIFT);
+
+ /* Added to a constant, "size" becomes the left-shift value
+ * for setting word_size.
+ */
+ size += NVM_WORD_SIZE_BASE_SHIFT;
+
+ /* Just in case size is out of range, cap it to the largest
+ * EEPROM size supported
+ */
+ if (size > 15)
+ size = 15;
+
+ nvm->word_size = BIT(size);
+ nvm->opcode_bits = 8;
+ nvm->delay_usec = 1;
+
+ nvm->page_size = eecd & IGC_EECD_ADDR_BITS ? 32 : 8;
+ nvm->address_bits = eecd & IGC_EECD_ADDR_BITS ?
+ 16 : 8;
+
+ if (nvm->word_size == BIT(15))
+ nvm->page_size = 128;
+
+ return 0;
+}
+
+/**
+ * igc_setup_copper_link_base - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Configures the link for auto-neg or forced speed and duplex. Then we check
+ * for link, once link is established calls to configure collision distance
+ * and flow control are called.
+ */
+static s32 igc_setup_copper_link_base(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 ctrl;
+
+ ctrl = rd32(IGC_CTRL);
+ ctrl |= IGC_CTRL_SLU;
+ ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX);
+ wr32(IGC_CTRL, ctrl);
+
+ ret_val = igc_setup_copper_link(hw);
+
+ return ret_val;
+}
+
+/**
+ * igc_init_mac_params_base - Init MAC func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_mac_params_base(struct igc_hw *hw)
+{
+ struct igc_dev_spec_base *dev_spec = &hw->dev_spec._base;
+ struct igc_mac_info *mac = &hw->mac;
+
+ /* Set mta register count */
+ mac->mta_reg_count = 128;
+ mac->rar_entry_count = IGC_RAR_ENTRIES;
+
+ /* reset */
+ mac->ops.reset_hw = igc_reset_hw_base;
+
+ mac->ops.acquire_swfw_sync = igc_acquire_swfw_sync_i225;
+ mac->ops.release_swfw_sync = igc_release_swfw_sync_i225;
+
+ /* Allow a single clear of the SW semaphore on I225 */
+ if (mac->type == igc_i225)
+ dev_spec->clear_semaphore_once = true;
+
+ /* physical interface link setup */
+ mac->ops.setup_physical_interface = igc_setup_copper_link_base;
+
+ return 0;
+}
+
+/**
+ * igc_init_phy_params_base - Init PHY func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_phy_params_base(struct igc_hw *hw)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ s32 ret_val = 0;
+ u32 ctrl_ext;
+
+ if (hw->phy.media_type != igc_media_type_copper) {
+ phy->type = igc_phy_none;
+ goto out;
+ }
+
+ phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT_2500;
+ phy->reset_delay_us = 100;
+
+ ctrl_ext = rd32(IGC_CTRL_EXT);
+
+ /* set lan id */
+ hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >>
+ IGC_STATUS_FUNC_SHIFT;
+
+ /* Make sure the PHY is in a good state. Several people have reported
+ * firmware leaving the PHY's page select register set to something
+ * other than the default of zero, which causes the PHY ID read to
+ * access something other than the intended register.
+ */
+ ret_val = hw->phy.ops.reset(hw);
+ if (ret_val) {
+ hw_dbg("Error resetting the PHY.\n");
+ goto out;
+ }
+
+ ret_val = igc_get_phy_id_base(hw);
+ if (ret_val)
+ return ret_val;
+
+ igc_check_for_link_base(hw);
+
+ /* Verify phy id and set remaining function pointers */
+ switch (phy->id) {
+ case I225_I_PHY_ID:
+ phy->type = igc_phy_i225;
+ break;
+ default:
+ ret_val = -IGC_ERR_PHY;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+static s32 igc_get_invariants_base(struct igc_hw *hw)
+{
+ struct igc_mac_info *mac = &hw->mac;
+ u32 link_mode = 0;
+ u32 ctrl_ext = 0;
+ s32 ret_val = 0;
+
+ switch (hw->device_id) {
+ case IGC_DEV_ID_I225_LM:
+ case IGC_DEV_ID_I225_V:
+ mac->type = igc_i225;
+ break;
+ default:
+ return -IGC_ERR_MAC_INIT;
+ }
+
+ hw->phy.media_type = igc_media_type_copper;
+
+ ctrl_ext = rd32(IGC_CTRL_EXT);
+ link_mode = ctrl_ext & IGC_CTRL_EXT_LINK_MODE_MASK;
+
+ /* mac initialization and operations */
+ ret_val = igc_init_mac_params_base(hw);
+ if (ret_val)
+ goto out;
+
+ /* NVM initialization */
+ ret_val = igc_init_nvm_params_base(hw);
+ switch (hw->mac.type) {
+ case igc_i225:
+ ret_val = igc_init_nvm_params_i225(hw);
+ break;
+ default:
+ break;
+ }
+
+ /* setup PHY parameters */
+ ret_val = igc_init_phy_params_base(hw);
+ if (ret_val)
+ goto out;
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_acquire_phy_base - Acquire rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * Acquire access rights to the correct PHY. This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_acquire_phy_base(struct igc_hw *hw)
+{
+ u16 mask = IGC_SWFW_PHY0_SM;
+
+ return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_release_phy_base - Release rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * A wrapper to release access rights to the correct PHY. This is a
+ * function pointer entry point called by the api module.
+ */
+static void igc_release_phy_base(struct igc_hw *hw)
+{
+ u16 mask = IGC_SWFW_PHY0_SM;
+
+ hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_get_link_up_info_base - Get link speed/duplex info
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * This is a wrapper function, if using the serial gigabit media independent
+ * interface, use PCS to retrieve the link speed and duplex information.
+ * Otherwise, use the generic function to get the link speed and duplex info.
+ */
+static s32 igc_get_link_up_info_base(struct igc_hw *hw, u16 *speed,
+ u16 *duplex)
+{
+ s32 ret_val;
+
+ ret_val = igc_get_speed_and_duplex_copper(hw, speed, duplex);
+
+ return ret_val;
+}
+
+/**
+ * igc_init_hw_base - Initialize hardware
+ * @hw: pointer to the HW structure
+ *
+ * This inits the hardware readying it for operation.
+ */
+static s32 igc_init_hw_base(struct igc_hw *hw)
+{
+ struct igc_mac_info *mac = &hw->mac;
+ u16 i, rar_count = mac->rar_entry_count;
+ s32 ret_val = 0;
+
+ /* Setup the receive address */
+ igc_init_rx_addrs(hw, rar_count);
+
+ /* Zero out the Multicast HASH table */
+ hw_dbg("Zeroing the MTA\n");
+ for (i = 0; i < mac->mta_reg_count; i++)
+ array_wr32(IGC_MTA, i, 0);
+
+ /* Zero out the Unicast HASH table */
+ hw_dbg("Zeroing the UTA\n");
+ for (i = 0; i < mac->uta_reg_count; i++)
+ array_wr32(IGC_UTA, i, 0);
+
+ /* Setup link and flow control */
+ ret_val = igc_setup_link(hw);
+
+ /* Clear all of the statistics registers (clear on read). It is
+ * important that we do this after we have tried to establish link
+ * because the symbol error count will increment wildly if there
+ * is no link.
+ */
+ igc_clear_hw_cntrs_base(hw);
+
+ return ret_val;
+}
+
+/**
+ * igc_read_mac_addr_base - Read device MAC address
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_read_mac_addr_base(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+
+ ret_val = igc_read_mac_addr(hw);
+
+ return ret_val;
+}
+
+/**
+ * igc_power_down_phy_copper_base - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ */
+void igc_power_down_phy_copper_base(struct igc_hw *hw)
+{
+ /* If the management interface is not enabled, then power down */
+ if (!(igc_enable_mng_pass_thru(hw) || igc_check_reset_block(hw)))
+ igc_power_down_phy_copper(hw);
+}
+
+/**
+ * igc_rx_fifo_flush_base - Clean rx fifo after Rx enable
+ * @hw: pointer to the HW structure
+ *
+ * After Rx enable, if manageability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo. This
+ * function clears the fifos and flushes any packets that came in as rx was
+ * being enabled.
+ */
+void igc_rx_fifo_flush_base(struct igc_hw *hw)
+{
+ u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+ int i, ms_wait;
+
+ /* disable IPv6 options as per hardware errata */
+ rfctl = rd32(IGC_RFCTL);
+ rfctl |= IGC_RFCTL_IPV6_EX_DIS;
+ wr32(IGC_RFCTL, rfctl);
+
+ if (!(rd32(IGC_MANC) & IGC_MANC_RCV_TCO_EN))
+ return;
+
+ /* Disable all Rx queues */
+ for (i = 0; i < 4; i++) {
+ rxdctl[i] = rd32(IGC_RXDCTL(i));
+ wr32(IGC_RXDCTL(i),
+ rxdctl[i] & ~IGC_RXDCTL_QUEUE_ENABLE);
+ }
+ /* Poll all queues to verify they have shut down */
+ for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+ usleep_range(1000, 2000);
+ rx_enabled = 0;
+ for (i = 0; i < 4; i++)
+ rx_enabled |= rd32(IGC_RXDCTL(i));
+ if (!(rx_enabled & IGC_RXDCTL_QUEUE_ENABLE))
+ break;
+ }
+
+ if (ms_wait == 10)
+ pr_debug("Queue disable timed out after 10ms\n");
+
+ /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+ * incoming packets are rejected. Set enable and wait 2ms so that
+ * any packet that was coming in as RCTL.EN was set is flushed
+ */
+ wr32(IGC_RFCTL, rfctl & ~IGC_RFCTL_LEF);
+
+ rlpml = rd32(IGC_RLPML);
+ wr32(IGC_RLPML, 0);
+
+ rctl = rd32(IGC_RCTL);
+ temp_rctl = rctl & ~(IGC_RCTL_EN | IGC_RCTL_SBP);
+ temp_rctl |= IGC_RCTL_LPE;
+
+ wr32(IGC_RCTL, temp_rctl);
+ wr32(IGC_RCTL, temp_rctl | IGC_RCTL_EN);
+ wrfl();
+ usleep_range(2000, 3000);
+
+ /* Enable Rx queues that were previously enabled and restore our
+ * previous state
+ */
+ for (i = 0; i < 4; i++)
+ wr32(IGC_RXDCTL(i), rxdctl[i]);
+ wr32(IGC_RCTL, rctl);
+ wrfl();
+
+ wr32(IGC_RLPML, rlpml);
+ wr32(IGC_RFCTL, rfctl);
+
+ /* Flush receive errors generated by workaround */
+ rd32(IGC_ROC);
+ rd32(IGC_RNBC);
+ rd32(IGC_MPC);
+}
+
+static struct igc_mac_operations igc_mac_ops_base = {
+ .init_hw = igc_init_hw_base,
+ .check_for_link = igc_check_for_link_base,
+ .rar_set = igc_rar_set,
+ .read_mac_addr = igc_read_mac_addr_base,
+ .get_speed_and_duplex = igc_get_link_up_info_base,
+};
+
+static const struct igc_phy_operations igc_phy_ops_base = {
+ .acquire = igc_acquire_phy_base,
+ .release = igc_release_phy_base,
+ .reset = igc_phy_hw_reset,
+ .read_reg = igc_read_phy_reg_gpy,
+ .write_reg = igc_write_phy_reg_gpy,
+};
+
+const struct igc_info igc_base_info = {
+ .get_invariants = igc_get_invariants_base,
+ .mac_ops = &igc_mac_ops_base,
+ .phy_ops = &igc_phy_ops_base,
+};
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
new file mode 100644
index 000000000000..35588fa7b8c5
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_BASE_H
+#define _IGC_BASE_H
+
+/* forward declaration */
+void igc_rx_fifo_flush_base(struct igc_hw *hw);
+void igc_power_down_phy_copper_base(struct igc_hw *hw);
+
+/* Transmit Descriptor - Advanced */
+union igc_adv_tx_desc {
+ struct {
+ __le64 buffer_addr; /* Address of descriptor's data buf */
+ __le32 cmd_type_len;
+ __le32 olinfo_status;
+ } read;
+ struct {
+ __le64 rsvd; /* Reserved */
+ __le32 nxtseq_seed;
+ __le32 status;
+ } wb;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */
+#define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
+#define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
+#define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */
+#define IGC_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_ADVTXD_DCMD_RS 0x08000000 /* Report Status */
+#define IGC_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
+#define IGC_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
+#define IGC_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
+#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
+
+#define IGC_RAR_ENTRIES 16
+
+struct igc_adv_data_desc {
+ __le64 buffer_addr; /* Address of the descriptor's data buffer */
+ union {
+ u32 data;
+ struct {
+ u32 datalen:16; /* Data buffer length */
+ u32 rsvd:4;
+ u32 dtyp:4; /* Descriptor type */
+ u32 dcmd:8; /* Descriptor command */
+ } config;
+ } lower;
+ union {
+ u32 data;
+ struct {
+ u32 status:4; /* Descriptor status */
+ u32 idx:4;
+ u32 popts:6; /* Packet Options */
+ u32 paylen:18; /* Payload length */
+ } options;
+ } upper;
+};
+
+/* Receive Descriptor - Advanced */
+union igc_adv_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ } read;
+ struct {
+ struct {
+ union {
+ __le32 data;
+ struct {
+ __le16 pkt_info; /*RSS type, Pkt type*/
+ /* Split Header, header buffer len */
+ __le16 hdr_info;
+ } hs_rss;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ struct {
+ __le16 ip_id; /* IP id */
+ __le16 csum; /* Packet Checksum */
+ } csum_ip;
+ } hi_dword;
+ } lower;
+ struct {
+ __le32 status_error; /* ext status/error */
+ __le16 length; /* Packet length */
+ __le16 vlan; /* VLAN tag */
+ } upper;
+ } wb; /* writeback */
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
+
+/* Additional Transmit Descriptor Control definitions */
+#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */
+
+/* Additional Receive Descriptor Control definitions */
+#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */
+
+/* SRRCTL bit definitions */
+#define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */
+#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */
+#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+
+#endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
new file mode 100644
index 000000000000..8740754ea1fd
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_DEFINES_H_
+#define _IGC_DEFINES_H_
+
+#define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */
+
+/* PCI Bus Info */
+#define PCIE_DEVICE_CONTROL2 0x28
+#define PCIE_DEVICE_CONTROL2_16ms 0x0005
+
+/* Physical Func Reset Done Indication */
+#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define COPPER_LINK_UP_LIMIT 10
+#define PHY_AUTO_NEG_LIMIT 45
+#define PHY_FORCE_LIMIT 20
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT 800
+/*Blocks new Master requests */
+#define IGC_CTRL_GIO_MASTER_DISABLE 0x00000004
+/* Status of Master requests. */
+#define IGC_STATUS_GIO_MASTER_ENABLE 0x00080000
+
+/* PCI Express Control */
+#define IGC_GCR_CMPL_TMOUT_MASK 0x0000F000
+#define IGC_GCR_CMPL_TMOUT_10ms 0x00001000
+#define IGC_GCR_CMPL_TMOUT_RESEND 0x00010000
+#define IGC_GCR_CAP_VER2 0x00040000
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots. However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define IGC_RAH_AV 0x80000000 /* Receive descriptor valid */
+#define IGC_RAH_POOL_1 0x00040000
+#define IGC_RAL_MAC_ADDR_LEN 4
+#define IGC_RAH_MAC_ADDR_LEN 2
+
+/* Error Codes */
+#define IGC_SUCCESS 0
+#define IGC_ERR_NVM 1
+#define IGC_ERR_PHY 2
+#define IGC_ERR_CONFIG 3
+#define IGC_ERR_PARAM 4
+#define IGC_ERR_MAC_INIT 5
+#define IGC_ERR_RESET 9
+#define IGC_ERR_MASTER_REQUESTS_PENDING 10
+#define IGC_ERR_BLK_PHY_RESET 12
+#define IGC_ERR_SWFW_SYNC 13
+
+/* Device Control */
+#define IGC_CTRL_RST 0x04000000 /* Global reset */
+
+#define IGC_CTRL_PHY_RST 0x80000000 /* PHY Reset */
+#define IGC_CTRL_SLU 0x00000040 /* Set link up (Force Link) */
+#define IGC_CTRL_FRCSPD 0x00000800 /* Force Speed */
+#define IGC_CTRL_FRCDPX 0x00001000 /* Force Duplex */
+
+#define IGC_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */
+#define IGC_CTRL_TFCE 0x10000000 /* Transmit flow control enable */
+
+#define IGC_CONNSW_AUTOSENSE_EN 0x1
+
+/* PBA constants */
+#define IGC_PBA_34K 0x0022
+
+/* SW Semaphore Register */
+#define IGC_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */
+#define IGC_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */
+
+/* SWFW_SYNC Definitions */
+#define IGC_SWFW_EEP_SM 0x1
+#define IGC_SWFW_PHY0_SM 0x2
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */
+#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */
+#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */
+
+/* PHY GPY 211 registers */
+#define STANDARD_AN_REG_MASK 0x0007 /* MMD */
+#define ANEG_MULTIGBT_AN_CTRL 0x0020 /* MULTI GBT AN Control Register */
+#define MMD_DEVADDR_SHIFT 16 /* Shift MMD to higher bits */
+#define CR_2500T_FD_CAPS 0x0080 /* Advertise 2500T FD capability */
+
+/* NVM Control */
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT 10
+#define IGC_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */
+#define IGC_EECD_REQ 0x00000040 /* NVM Access Request */
+#define IGC_EECD_GNT 0x00000080 /* NVM Access Grant */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define IGC_EECD_ADDR_BITS 0x00000400
+#define IGC_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */
+#define IGC_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */
+#define IGC_EECD_SIZE_EX_SHIFT 11
+#define IGC_EECD_FLUPD_I225 0x00800000 /* Update FLASH */
+#define IGC_EECD_FLUDONE_I225 0x04000000 /* Update FLASH done*/
+#define IGC_EECD_FLASH_DETECTED_I225 0x00080000 /* FLASH detected */
+#define IGC_FLUDONE_ATTEMPTS 20000
+#define IGC_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */
+
+/* Offset to data in NVM read/write registers */
+#define IGC_NVM_RW_REG_DATA 16
+#define IGC_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */
+#define IGC_NVM_RW_REG_START 1 /* Start operation */
+#define IGC_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */
+#define IGC_NVM_POLL_READ 0 /* Flag for polling for read complete */
+
+/* NVM Word Offsets */
+#define NVM_CHECKSUM_REG 0x003F
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM 0xBABA
+
+#define NVM_PBA_OFFSET_0 8
+#define NVM_PBA_OFFSET_1 9
+#define NVM_RESERVED_WORD 0xFFFF
+#define NVM_PBA_PTR_GUARD 0xFAFA
+#define NVM_WORD_SIZE_BASE_SHIFT 6
+
+/* Collision related configuration parameters */
+#define IGC_COLLISION_THRESHOLD 15
+#define IGC_CT_SHIFT 4
+#define IGC_COLLISION_DISTANCE 63
+#define IGC_COLD_SHIFT 12
+
+/* Device Status */
+#define IGC_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */
+#define IGC_STATUS_LU 0x00000002 /* Link up.0=no,1=link */
+#define IGC_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */
+#define IGC_STATUS_FUNC_SHIFT 2
+#define IGC_STATUS_FUNC_1 0x00000004 /* Function 1 */
+#define IGC_STATUS_TXOFF 0x00000010 /* transmission paused */
+#define IGC_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */
+#define IGC_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */
+#define IGC_STATUS_SPEED_2500 0x00400000 /* Speed 2.5Gb/s */
+
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+#define SPEED_2500 2500
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+/* 1Gbps and 2.5Gbps half duplex is not supported, nor spec-compliant. */
+#define ADVERTISE_10_HALF 0x0001
+#define ADVERTISE_10_FULL 0x0002
+#define ADVERTISE_100_HALF 0x0004
+#define ADVERTISE_100_FULL 0x0008
+#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL 0x0020
+#define ADVERTISE_2500_HALF 0x0040 /* Not used, just FYI */
+#define ADVERTISE_2500_FULL 0x0080
+
+#define IGC_ALL_SPEED_DUPLEX_2500 ( \
+ ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+ ADVERTISE_100_FULL | ADVERTISE_1000_FULL | ADVERTISE_2500_FULL)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT_2500 IGC_ALL_SPEED_DUPLEX_2500
+
+/* Interrupt Cause Read */
+#define IGC_ICR_TXDW BIT(0) /* Transmit desc written back */
+#define IGC_ICR_TXQE BIT(1) /* Transmit Queue empty */
+#define IGC_ICR_LSC BIT(2) /* Link Status Change */
+#define IGC_ICR_RXSEQ BIT(3) /* Rx sequence error */
+#define IGC_ICR_RXDMT0 BIT(4) /* Rx desc min. threshold (0) */
+#define IGC_ICR_RXO BIT(6) /* Rx overrun */
+#define IGC_ICR_RXT0 BIT(7) /* Rx timer intr (ring 0) */
+#define IGC_ICR_DRSTA BIT(30) /* Device Reset Asserted */
+
+/* If this bit asserted, the driver should claim the interrupt */
+#define IGC_ICR_INT_ASSERTED BIT(31)
+
+#define IGC_ICS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */
+
+#define IMS_ENABLE_MASK ( \
+ IGC_IMS_RXT0 | \
+ IGC_IMS_TXDW | \
+ IGC_IMS_RXDMT0 | \
+ IGC_IMS_RXSEQ | \
+ IGC_IMS_LSC)
+
+/* Interrupt Mask Set */
+#define IGC_IMS_TXDW IGC_ICR_TXDW /* Tx desc written back */
+#define IGC_IMS_RXSEQ IGC_ICR_RXSEQ /* Rx sequence error */
+#define IGC_IMS_LSC IGC_ICR_LSC /* Link Status Change */
+#define IGC_IMS_DOUTSYNC IGC_ICR_DOUTSYNC /* NIC DMA out of sync */
+#define IGC_IMS_DRSTA IGC_ICR_DRSTA /* Device Reset Asserted */
+#define IGC_IMS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */
+#define IGC_IMS_RXDMT0 IGC_ICR_RXDMT0 /* Rx desc min. threshold */
+
+#define IGC_QVECTOR_MASK 0x7FFC /* Q-vector mask */
+#define IGC_ITR_VAL_MASK 0x04 /* ITR value mask */
+
+/* Interrupt Cause Set */
+#define IGC_ICS_LSC IGC_ICR_LSC /* Link Status Change */
+#define IGC_ICS_RXDMT0 IGC_ICR_RXDMT0 /* rx desc min. threshold */
+#define IGC_ICS_DRSTA IGC_ICR_DRSTA /* Device Reset Aserted */
+
+#define IGC_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */
+#define IGC_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */
+#define IGC_IVAR_VALID 0x80
+#define IGC_GPIE_NSICR 0x00000001
+#define IGC_GPIE_MSIX_MODE 0x00000010
+#define IGC_GPIE_EIAME 0x40000000
+#define IGC_GPIE_PBA 0x80000000
+
+/* Transmit Descriptor bit definitions */
+#define IGC_TXD_DTYP_D 0x00100000 /* Data Descriptor */
+#define IGC_TXD_DTYP_C 0x00000000 /* Context Descriptor */
+#define IGC_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */
+#define IGC_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */
+#define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */
+#define IGC_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_TXD_CMD_IC 0x04000000 /* Insert Checksum */
+#define IGC_TXD_CMD_RS 0x08000000 /* Report Status */
+#define IGC_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */
+#define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */
+#define IGC_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
+#define IGC_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */
+#define IGC_TXD_STAT_DD 0x00000001 /* Descriptor Done */
+#define IGC_TXD_STAT_EC 0x00000002 /* Excess Collisions */
+#define IGC_TXD_STAT_LC 0x00000004 /* Late Collisions */
+#define IGC_TXD_STAT_TU 0x00000008 /* Transmit underrun */
+#define IGC_TXD_CMD_TCP 0x01000000 /* TCP packet */
+#define IGC_TXD_CMD_IP 0x02000000 /* IP packet */
+#define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */
+#define IGC_TXD_STAT_TC 0x00000004 /* Tx Underrun */
+#define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */
+
+/* Transmit Control */
+#define IGC_TCTL_EN 0x00000002 /* enable Tx */
+#define IGC_TCTL_PSP 0x00000008 /* pad short packets */
+#define IGC_TCTL_CT 0x00000ff0 /* collision threshold */
+#define IGC_TCTL_COLD 0x003ff000 /* collision distance */
+#define IGC_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */
+#define IGC_TCTL_MULR 0x10000000 /* Multiple request support */
+
+#define IGC_CT_SHIFT 4
+#define IGC_COLLISION_THRESHOLD 15
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE 0x8808
+/* Enable XON frame transmission */
+#define IGC_FCRTL_XONE 0x80000000
+
+/* Management Control */
+#define IGC_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */
+#define IGC_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */
+
+/* Receive Control */
+#define IGC_RCTL_RST 0x00000001 /* Software reset */
+#define IGC_RCTL_EN 0x00000002 /* enable */
+#define IGC_RCTL_SBP 0x00000004 /* store bad packet */
+#define IGC_RCTL_UPE 0x00000008 /* unicast promisc enable */
+#define IGC_RCTL_MPE 0x00000010 /* multicast promisc enable */
+#define IGC_RCTL_LPE 0x00000020 /* long packet enable */
+#define IGC_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */
+#define IGC_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */
+
+#define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */
+#define IGC_RCTL_BAM 0x00008000 /* broadcast enable */
+
+/* Receive Descriptor bit definitions */
+#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */
+
+#define IGC_RXDEXT_STATERR_CE 0x01000000
+#define IGC_RXDEXT_STATERR_SE 0x02000000
+#define IGC_RXDEXT_STATERR_SEQ 0x04000000
+#define IGC_RXDEXT_STATERR_CXE 0x10000000
+#define IGC_RXDEXT_STATERR_TCPE 0x20000000
+#define IGC_RXDEXT_STATERR_IPE 0x40000000
+#define IGC_RXDEXT_STATERR_RXE 0x80000000
+
+/* Same mask, but for extended and packet split descriptors */
+#define IGC_RXDEXT_ERR_FRAME_ERR_MASK ( \
+ IGC_RXDEXT_STATERR_CE | \
+ IGC_RXDEXT_STATERR_SE | \
+ IGC_RXDEXT_STATERR_SEQ | \
+ IGC_RXDEXT_STATERR_CXE | \
+ IGC_RXDEXT_STATERR_RXE)
+
+/* Header split receive */
+#define IGC_RFCTL_IPV6_EX_DIS 0x00010000
+#define IGC_RFCTL_LEF 0x00040000
+
+#define IGC_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */
+
+#define IGC_RCTL_MO_SHIFT 12 /* multicast offset shift */
+#define IGC_RCTL_CFIEN 0x00080000 /* canonical form enable */
+#define IGC_RCTL_DPF 0x00400000 /* discard pause frames */
+#define IGC_RCTL_PMCF 0x00800000 /* pass MAC control frames */
+#define IGC_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */
+
+#define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */
+#define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */
+
+/* GPY211 - I225 defines */
+#define GPY_MMD_MASK 0xFFFF0000
+#define GPY_MMD_SHIFT 16
+#define GPY_REG_MASK 0x0000FFFF
+
+#define IGC_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */
+
+/* MAC definitions */
+#define IGC_FACTPS_MNGCG 0x20000000
+#define IGC_FWSM_MODE_MASK 0xE
+#define IGC_FWSM_MODE_SHIFT 1
+
+/* Management Control */
+#define IGC_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */
+#define IGC_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */
+
+/* PHY */
+#define PHY_REVISION_MASK 0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */
+#define IGC_GEN_POLL_TIMEOUT 1920
+
+/* PHY Control Register */
+#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */
+#define MII_CR_POWER_DOWN 0x0800 /* Power down */
+#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */
+#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */
+#define MII_CR_SPEED_1000 0x0040
+#define MII_CR_SPEED_100 0x2000
+#define MII_CR_SPEED_10 0x0000
+
+/* PHY Status Register */
+#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL 0x00 /* Control Register */
+#define PHY_STATUS 0x01 /* Status Register */
+#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */
+#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
+
+/* Bit definitions for valid PHY IDs. I = Integrated E = External */
+#define I225_I_PHY_ID 0x67C9DC00
+
+/* MDI Control */
+#define IGC_MDIC_DATA_MASK 0x0000FFFF
+#define IGC_MDIC_REG_MASK 0x001F0000
+#define IGC_MDIC_REG_SHIFT 16
+#define IGC_MDIC_PHY_MASK 0x03E00000
+#define IGC_MDIC_PHY_SHIFT 21
+#define IGC_MDIC_OP_WRITE 0x04000000
+#define IGC_MDIC_OP_READ 0x08000000
+#define IGC_MDIC_READY 0x10000000
+#define IGC_MDIC_INT_EN 0x20000000
+#define IGC_MDIC_ERROR 0x40000000
+#define IGC_MDIC_DEST 0x80000000
+
+#define IGC_N0_QUEUE -1
+
+#endif /* _IGC_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
new file mode 100644
index 000000000000..c50414f48f0d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_HW_H_
+#define _IGC_HW_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+
+#include "igc_regs.h"
+#include "igc_defines.h"
+#include "igc_mac.h"
+#include "igc_phy.h"
+#include "igc_nvm.h"
+#include "igc_i225.h"
+#include "igc_base.h"
+
+#define IGC_DEV_ID_I225_LM 0x15F2
+#define IGC_DEV_ID_I225_V 0x15F3
+
+#define IGC_FUNC_0 0
+
+/* Function pointers for the MAC. */
+struct igc_mac_operations {
+ s32 (*check_for_link)(struct igc_hw *hw);
+ s32 (*reset_hw)(struct igc_hw *hw);
+ s32 (*init_hw)(struct igc_hw *hw);
+ s32 (*setup_physical_interface)(struct igc_hw *hw);
+ void (*rar_set)(struct igc_hw *hw, u8 *address, u32 index);
+ s32 (*read_mac_addr)(struct igc_hw *hw);
+ s32 (*get_speed_and_duplex)(struct igc_hw *hw, u16 *speed,
+ u16 *duplex);
+ s32 (*acquire_swfw_sync)(struct igc_hw *hw, u16 mask);
+ void (*release_swfw_sync)(struct igc_hw *hw, u16 mask);
+};
+
+enum igc_mac_type {
+ igc_undefined = 0,
+ igc_i225,
+ igc_num_macs /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum igc_phy_type {
+ igc_phy_unknown = 0,
+ igc_phy_none,
+ igc_phy_i225,
+};
+
+enum igc_media_type {
+ igc_media_type_unknown = 0,
+ igc_media_type_copper = 1,
+ igc_num_media_types
+};
+
+enum igc_nvm_type {
+ igc_nvm_unknown = 0,
+ igc_nvm_flash_hw,
+ igc_nvm_invm,
+};
+
+struct igc_info {
+ s32 (*get_invariants)(struct igc_hw *hw);
+ struct igc_mac_operations *mac_ops;
+ const struct igc_phy_operations *phy_ops;
+ struct igc_nvm_operations *nvm_ops;
+};
+
+extern const struct igc_info igc_base_info;
+
+struct igc_mac_info {
+ struct igc_mac_operations ops;
+
+ u8 addr[ETH_ALEN];
+ u8 perm_addr[ETH_ALEN];
+
+ enum igc_mac_type type;
+
+ u32 collision_delta;
+ u32 ledctl_default;
+ u32 ledctl_mode1;
+ u32 ledctl_mode2;
+ u32 mc_filter_type;
+ u32 tx_packet_delta;
+ u32 txcw;
+
+ u16 mta_reg_count;
+ u16 uta_reg_count;
+
+ u16 rar_entry_count;
+
+ u8 forced_speed_duplex;
+
+ bool adaptive_ifs;
+ bool has_fwsm;
+ bool asf_firmware_present;
+ bool arc_subsystem_valid;
+
+ bool autoneg;
+ bool autoneg_failed;
+ bool get_link_status;
+};
+
+struct igc_nvm_operations {
+ s32 (*acquire)(struct igc_hw *hw);
+ s32 (*read)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+ void (*release)(struct igc_hw *hw);
+ s32 (*write)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+ s32 (*update)(struct igc_hw *hw);
+ s32 (*validate)(struct igc_hw *hw);
+ s32 (*valid_led_default)(struct igc_hw *hw, u16 *data);
+};
+
+struct igc_phy_operations {
+ s32 (*acquire)(struct igc_hw *hw);
+ s32 (*check_polarity)(struct igc_hw *hw);
+ s32 (*check_reset_block)(struct igc_hw *hw);
+ s32 (*force_speed_duplex)(struct igc_hw *hw);
+ s32 (*get_cfg_done)(struct igc_hw *hw);
+ s32 (*get_cable_length)(struct igc_hw *hw);
+ s32 (*get_phy_info)(struct igc_hw *hw);
+ s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
+ void (*release)(struct igc_hw *hw);
+ s32 (*reset)(struct igc_hw *hw);
+ s32 (*write_reg)(struct igc_hw *hw, u32 address, u16 data);
+};
+
+struct igc_nvm_info {
+ struct igc_nvm_operations ops;
+ enum igc_nvm_type type;
+
+ u32 flash_bank_size;
+ u32 flash_base_addr;
+
+ u16 word_size;
+ u16 delay_usec;
+ u16 address_bits;
+ u16 opcode_bits;
+ u16 page_size;
+};
+
+struct igc_phy_info {
+ struct igc_phy_operations ops;
+
+ enum igc_phy_type type;
+
+ u32 addr;
+ u32 id;
+ u32 reset_delay_us; /* in usec */
+ u32 revision;
+
+ enum igc_media_type media_type;
+
+ u16 autoneg_advertised;
+ u16 autoneg_mask;
+ u16 cable_length;
+ u16 max_cable_length;
+ u16 min_cable_length;
+ u16 pair_length[4];
+
+ u8 mdix;
+
+ bool disable_polarity_correction;
+ bool is_mdix;
+ bool polarity_correction;
+ bool reset_disable;
+ bool speed_downgraded;
+ bool autoneg_wait_to_complete;
+};
+
+struct igc_bus_info {
+ u16 func;
+ u16 pci_cmd_word;
+};
+
+enum igc_fc_mode {
+ igc_fc_none = 0,
+ igc_fc_rx_pause,
+ igc_fc_tx_pause,
+ igc_fc_full,
+ igc_fc_default = 0xFF
+};
+
+struct igc_fc_info {
+ u32 high_water; /* Flow control high-water mark */
+ u32 low_water; /* Flow control low-water mark */
+ u16 pause_time; /* Flow control pause timer */
+ bool send_xon; /* Flow control send XON */
+ bool strict_ieee; /* Strict IEEE mode */
+ enum igc_fc_mode current_mode; /* Type of flow control */
+ enum igc_fc_mode requested_mode;
+};
+
+struct igc_dev_spec_base {
+ bool global_device_reset;
+ bool eee_disable;
+ bool clear_semaphore_once;
+ bool module_plugged;
+ u8 media_port;
+ bool mas_capable;
+};
+
+struct igc_hw {
+ void *back;
+
+ u8 __iomem *hw_addr;
+ unsigned long io_base;
+
+ struct igc_mac_info mac;
+ struct igc_fc_info fc;
+ struct igc_nvm_info nvm;
+ struct igc_phy_info phy;
+
+ struct igc_bus_info bus;
+
+ union {
+ struct igc_dev_spec_base _base;
+ } dev_spec;
+
+ u16 device_id;
+ u16 subsystem_vendor_id;
+ u16 subsystem_device_id;
+ u16 vendor_id;
+
+ u8 revision_id;
+};
+
+/* Statistics counters collected by the MAC */
+struct igc_hw_stats {
+ u64 crcerrs;
+ u64 algnerrc;
+ u64 symerrs;
+ u64 rxerrc;
+ u64 mpc;
+ u64 scc;
+ u64 ecol;
+ u64 mcc;
+ u64 latecol;
+ u64 colc;
+ u64 dc;
+ u64 tncrs;
+ u64 sec;
+ u64 cexterr;
+ u64 rlec;
+ u64 xonrxc;
+ u64 xontxc;
+ u64 xoffrxc;
+ u64 xofftxc;
+ u64 fcruc;
+ u64 prc64;
+ u64 prc127;
+ u64 prc255;
+ u64 prc511;
+ u64 prc1023;
+ u64 prc1522;
+ u64 gprc;
+ u64 bprc;
+ u64 mprc;
+ u64 gptc;
+ u64 gorc;
+ u64 gotc;
+ u64 rnbc;
+ u64 ruc;
+ u64 rfc;
+ u64 roc;
+ u64 rjc;
+ u64 mgprc;
+ u64 mgpdc;
+ u64 mgptc;
+ u64 tor;
+ u64 tot;
+ u64 tpr;
+ u64 tpt;
+ u64 ptc64;
+ u64 ptc127;
+ u64 ptc255;
+ u64 ptc511;
+ u64 ptc1023;
+ u64 ptc1522;
+ u64 mptc;
+ u64 bptc;
+ u64 tsctc;
+ u64 tsctfc;
+ u64 iac;
+ u64 icrxptc;
+ u64 icrxatc;
+ u64 ictxptc;
+ u64 ictxatc;
+ u64 ictxqec;
+ u64 ictxqmtc;
+ u64 icrxdmtc;
+ u64 icrxoc;
+ u64 cbtmpc;
+ u64 htdpmc;
+ u64 cbrdpc;
+ u64 cbrmpc;
+ u64 rpthc;
+ u64 hgptc;
+ u64 htcbdpc;
+ u64 hgorc;
+ u64 hgotc;
+ u64 lenerrs;
+ u64 scvpc;
+ u64 hrmpc;
+ u64 doosync;
+ u64 o2bgptc;
+ u64 o2bspc;
+ u64 b2ospc;
+ u64 b2ogprc;
+};
+
+struct net_device *igc_get_hw_dev(struct igc_hw *hw);
+#define hw_dbg(format, arg...) \
+ netdev_dbg(igc_get_hw_dev(hw), format, ##arg)
+
+s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+
+#endif /* _IGC_HW_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
new file mode 100644
index 000000000000..c25f555aaf82
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the necessary semaphores for exclusive access to the EEPROM.
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+static s32 igc_acquire_nvm_i225(struct igc_hw *hw)
+{
+ return igc_acquire_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_release_nvm_i225 - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ * then release the semaphores acquired.
+ */
+static void igc_release_nvm_i225(struct igc_hw *hw)
+{
+ igc_release_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the HW semaphore to access the PHY or NVM
+ */
+static s32 igc_get_hw_semaphore_i225(struct igc_hw *hw)
+{
+ s32 timeout = hw->nvm.word_size + 1;
+ s32 i = 0;
+ u32 swsm;
+
+ /* Get the SW semaphore */
+ while (i < timeout) {
+ swsm = rd32(IGC_SWSM);
+ if (!(swsm & IGC_SWSM_SMBI))
+ break;
+
+ usleep_range(500, 600);
+ i++;
+ }
+
+ if (i == timeout) {
+ /* In rare circumstances, the SW semaphore may already be held
+ * unintentionally. Clear the semaphore once before giving up.
+ */
+ if (hw->dev_spec._base.clear_semaphore_once) {
+ hw->dev_spec._base.clear_semaphore_once = false;
+ igc_put_hw_semaphore(hw);
+ for (i = 0; i < timeout; i++) {
+ swsm = rd32(IGC_SWSM);
+ if (!(swsm & IGC_SWSM_SMBI))
+ break;
+
+ usleep_range(500, 600);
+ }
+ }
+
+ /* If we do not have the semaphore here, we have to give up. */
+ if (i == timeout) {
+ hw_dbg("Driver can't access device - SMBI bit is set.\n");
+ return -IGC_ERR_NVM;
+ }
+ }
+
+ /* Get the FW semaphore. */
+ for (i = 0; i < timeout; i++) {
+ swsm = rd32(IGC_SWSM);
+ wr32(IGC_SWSM, swsm | IGC_SWSM_SWESMBI);
+
+ /* Semaphore acquired if bit latched */
+ if (rd32(IGC_SWSM) & IGC_SWSM_SWESMBI)
+ break;
+
+ usleep_range(500, 600);
+ }
+
+ if (i == timeout) {
+ /* Release semaphores */
+ igc_put_hw_semaphore(hw);
+ hw_dbg("Driver can't access the NVM\n");
+ return -IGC_ERR_NVM;
+ }
+
+ return 0;
+}
+
+/**
+ * igc_acquire_swfw_sync_i225 - Acquire SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Acquire the SW/FW semaphore to access the PHY or NVM. The mask
+ * will also specify which port we're acquiring the lock for.
+ */
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+ s32 i = 0, timeout = 200;
+ u32 fwmask = mask << 16;
+ u32 swmask = mask;
+ s32 ret_val = 0;
+ u32 swfw_sync;
+
+ while (i < timeout) {
+ if (igc_get_hw_semaphore_i225(hw)) {
+ ret_val = -IGC_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync = rd32(IGC_SW_FW_SYNC);
+ if (!(swfw_sync & (fwmask | swmask)))
+ break;
+
+ /* Firmware currently using resource (fwmask) */
+ igc_put_hw_semaphore(hw);
+ mdelay(5);
+ i++;
+ }
+
+ if (i == timeout) {
+ hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+ ret_val = -IGC_ERR_SWFW_SYNC;
+ goto out;
+ }
+
+ swfw_sync |= swmask;
+ wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+ igc_put_hw_semaphore(hw);
+out:
+ return ret_val;
+}
+
+/**
+ * igc_release_swfw_sync_i225 - Release SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Release the SW/FW semaphore used to access the PHY or NVM. The mask
+ * will also specify which port we're releasing the lock for.
+ */
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+ u32 swfw_sync;
+
+ while (igc_get_hw_semaphore_i225(hw))
+ ; /* Empty */
+
+ swfw_sync = rd32(IGC_SW_FW_SYNC);
+ swfw_sync &= ~mask;
+ wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+ igc_put_hw_semaphore(hw);
+}
+
+/**
+ * igc_read_nvm_srrd_i225 - Reads Shadow Ram using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the Shadow Ram to read
+ * @words: number of words to read
+ * @data: word read from the Shadow Ram
+ *
+ * Reads a 16 bit word from the Shadow Ram using the EERD register.
+ * Uses necessary synchronization semaphores.
+ */
+static s32 igc_read_nvm_srrd_i225(struct igc_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ s32 status = 0;
+ u16 i, count;
+
+ /* We cannot hold synchronization semaphores for too long,
+ * because of forceful takeover procedure. However it is more efficient
+ * to read in bursts than synchronizing access for each word.
+ */
+ for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+ count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+ IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+ status = hw->nvm.ops.acquire(hw);
+ if (status)
+ break;
+
+ status = igc_read_nvm_eerd(hw, offset, count, data + i);
+ hw->nvm.ops.release(hw);
+ if (status)
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * igc_write_nvm_srwr - Write to Shadow Ram using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow Ram to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ * Writes data to Shadow Ram at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * Shadow Ram will most likely contain an invalid checksum.
+ */
+static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ struct igc_nvm_info *nvm = &hw->nvm;
+ u32 attempts = 100000;
+ u32 i, k, eewr = 0;
+ s32 ret_val = 0;
+
+ /* A check for invalid values: offset too large, too many words,
+ * too many words for the offset, and not enough words.
+ */
+ if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+ words == 0) {
+ hw_dbg("nvm parameter(s) out of bounds\n");
+ ret_val = -IGC_ERR_NVM;
+ goto out;
+ }
+
+ for (i = 0; i < words; i++) {
+ eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) |
+ (data[i] << IGC_NVM_RW_REG_DATA) |
+ IGC_NVM_RW_REG_START;
+
+ wr32(IGC_SRWR, eewr);
+
+ for (k = 0; k < attempts; k++) {
+ if (IGC_NVM_RW_REG_DONE &
+ rd32(IGC_SRWR)) {
+ ret_val = 0;
+ break;
+ }
+ udelay(5);
+ }
+
+ if (ret_val) {
+ hw_dbg("Shadow RAM write EEWR timed out\n");
+ break;
+ }
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_write_nvm_srwr_i225 - Write to Shadow RAM using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow RAM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ * Writes data to Shadow RAM at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * data will not be committed to FLASH and also Shadow RAM will most likely
+ * contain an invalid checksum.
+ *
+ * If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ * partially written.
+ */
+static s32 igc_write_nvm_srwr_i225(struct igc_hw *hw, u16 offset, u16 words,
+ u16 *data)
+{
+ s32 status = 0;
+ u16 i, count;
+
+ /* We cannot hold synchronization semaphores for too long,
+ * because of forceful takeover procedure. However it is more efficient
+ * to write in bursts than synchronizing access for each word.
+ */
+ for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+ count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+ IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+ status = hw->nvm.ops.acquire(hw);
+ if (status)
+ break;
+
+ status = igc_write_nvm_srwr(hw, offset, count, data + i);
+ hw->nvm.ops.release(hw);
+ if (status)
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * igc_validate_nvm_checksum_i225 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+static s32 igc_validate_nvm_checksum_i225(struct igc_hw *hw)
+{
+ s32 (*read_op_ptr)(struct igc_hw *hw, u16 offset, u16 count,
+ u16 *data);
+ s32 status = 0;
+
+ status = hw->nvm.ops.acquire(hw);
+ if (status)
+ goto out;
+
+ /* Replace the read function with semaphore grabbing with
+ * the one that skips this for a while.
+ * We have semaphore taken already here.
+ */
+ read_op_ptr = hw->nvm.ops.read;
+ hw->nvm.ops.read = igc_read_nvm_eerd;
+
+ status = igc_validate_nvm_checksum(hw);
+
+ /* Revert original read operation. */
+ hw->nvm.ops.read = read_op_ptr;
+
+ hw->nvm.ops.release(hw);
+
+out:
+ return status;
+}
+
+/**
+ * igc_pool_flash_update_done_i225 - Pool FLUDONE status
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_pool_flash_update_done_i225(struct igc_hw *hw)
+{
+ s32 ret_val = -IGC_ERR_NVM;
+ u32 i, reg;
+
+ for (i = 0; i < IGC_FLUDONE_ATTEMPTS; i++) {
+ reg = rd32(IGC_EECD);
+ if (reg & IGC_EECD_FLUDONE_I225) {
+ ret_val = 0;
+ break;
+ }
+ udelay(5);
+ }
+
+ return ret_val;
+}
+
+/**
+ * igc_update_flash_i225 - Commit EEPROM to the flash
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_update_flash_i225(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 flup;
+
+ ret_val = igc_pool_flash_update_done_i225(hw);
+ if (ret_val == -IGC_ERR_NVM) {
+ hw_dbg("Flash update time out\n");
+ goto out;
+ }
+
+ flup = rd32(IGC_EECD) | IGC_EECD_FLUPD_I225;
+ wr32(IGC_EECD, flup);
+
+ ret_val = igc_pool_flash_update_done_i225(hw);
+ if (ret_val)
+ hw_dbg("Flash update time out\n");
+ else
+ hw_dbg("Flash update complete\n");
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum_i225 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum. Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM. Next commit EEPROM data onto the Flash.
+ */
+static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw)
+{
+ u16 checksum = 0;
+ s32 ret_val = 0;
+ u16 i, nvm_data;
+
+ /* Read the first word from the EEPROM. If this times out or fails, do
+ * not continue or we could be in for a very long wait while every
+ * EEPROM read fails
+ */
+ ret_val = igc_read_nvm_eerd(hw, 0, 1, &nvm_data);
+ if (ret_val) {
+ hw_dbg("EEPROM read failed\n");
+ goto out;
+ }
+
+ ret_val = hw->nvm.ops.acquire(hw);
+ if (ret_val)
+ goto out;
+
+ /* Do not use hw->nvm.ops.write, hw->nvm.ops.read
+ * because we do not want to take the synchronization
+ * semaphores twice here.
+ */
+
+ for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+ ret_val = igc_read_nvm_eerd(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ hw->nvm.ops.release(hw);
+ hw_dbg("NVM Read Error while updating checksum.\n");
+ goto out;
+ }
+ checksum += nvm_data;
+ }
+ checksum = (u16)NVM_SUM - checksum;
+ ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+ &checksum);
+ if (ret_val) {
+ hw->nvm.ops.release(hw);
+ hw_dbg("NVM Write Error while updating checksum.\n");
+ goto out;
+ }
+
+ hw->nvm.ops.release(hw);
+
+ ret_val = igc_update_flash_i225(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_get_flash_presence_i225 - Check if flash device is detected
+ * @hw: pointer to the HW structure
+ */
+bool igc_get_flash_presence_i225(struct igc_hw *hw)
+{
+ bool ret_val = false;
+ u32 eec = 0;
+
+ eec = rd32(IGC_EECD);
+ if (eec & IGC_EECD_FLASH_DETECTED_I225)
+ ret_val = true;
+
+ return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_i225 - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+s32 igc_init_nvm_params_i225(struct igc_hw *hw)
+{
+ struct igc_nvm_info *nvm = &hw->nvm;
+
+ nvm->ops.acquire = igc_acquire_nvm_i225;
+ nvm->ops.release = igc_release_nvm_i225;
+
+ /* NVM Function Pointers */
+ if (igc_get_flash_presence_i225(hw)) {
+ hw->nvm.type = igc_nvm_flash_hw;
+ nvm->ops.read = igc_read_nvm_srrd_i225;
+ nvm->ops.write = igc_write_nvm_srwr_i225;
+ nvm->ops.validate = igc_validate_nvm_checksum_i225;
+ nvm->ops.update = igc_update_nvm_checksum_i225;
+ } else {
+ hw->nvm.type = igc_nvm_invm;
+ nvm->ops.read = igc_read_nvm_eerd;
+ nvm->ops.write = NULL;
+ nvm->ops.validate = NULL;
+ nvm->ops.update = NULL;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.h b/drivers/net/ethernet/intel/igc/igc_i225.h
new file mode 100644
index 000000000000..7b66e1f9c0e6
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_I225_H_
+#define _IGC_I225_H_
+
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+
+s32 igc_init_nvm_params_i225(struct igc_hw *hw);
+bool igc_get_flash_presence_i225(struct igc_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
new file mode 100644
index 000000000000..f7683d3ae47c
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -0,0 +1,806 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Intel Corporation */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "igc_mac.h"
+#include "igc_hw.h"
+
+/* forward declaration */
+static s32 igc_set_default_fc(struct igc_hw *hw);
+static s32 igc_set_fc_watermarks(struct igc_hw *hw);
+
+/**
+ * igc_disable_pcie_master - Disables PCI-express master access
+ * @hw: pointer to the HW structure
+ *
+ * Returns 0 (0) if successful, else returns -10
+ * (-IGC_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ * the master requests to be disabled.
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests.
+ */
+s32 igc_disable_pcie_master(struct igc_hw *hw)
+{
+ s32 timeout = MASTER_DISABLE_TIMEOUT;
+ s32 ret_val = 0;
+ u32 ctrl;
+
+ ctrl = rd32(IGC_CTRL);
+ ctrl |= IGC_CTRL_GIO_MASTER_DISABLE;
+ wr32(IGC_CTRL, ctrl);
+
+ while (timeout) {
+ if (!(rd32(IGC_STATUS) &
+ IGC_STATUS_GIO_MASTER_ENABLE))
+ break;
+ usleep_range(2000, 3000);
+ timeout--;
+ }
+
+ if (!timeout) {
+ hw_dbg("Master requests are pending.\n");
+ ret_val = -IGC_ERR_MASTER_REQUESTS_PENDING;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_init_rx_addrs - Initialize receive addresses
+ * @hw: pointer to the HW structure
+ * @rar_count: receive address registers
+ *
+ * Setup the receive address registers by setting the base receive address
+ * register to the devices MAC address and clearing all the other receive
+ * address registers to 0.
+ */
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count)
+{
+ u8 mac_addr[ETH_ALEN] = {0};
+ u32 i;
+
+ /* Setup the receive address */
+ hw_dbg("Programming MAC Address into RAR[0]\n");
+
+ hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+ /* Zero out the other (rar_entry_count - 1) receive addresses */
+ hw_dbg("Clearing RAR[1-%u]\n", rar_count - 1);
+ for (i = 1; i < rar_count; i++)
+ hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ * igc_setup_link - Setup flow control and link settings
+ * @hw: pointer to the HW structure
+ *
+ * Determines which flow control settings to use, then configures flow
+ * control. Calls the appropriate media-specific link configuration
+ * function. Assuming the adapter has a valid link partner, a valid link
+ * should be established. Assumes the hardware has previously been reset
+ * and the transmitter and receiver are not enabled.
+ */
+s32 igc_setup_link(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+
+ /* In the case of the phy reset being blocked, we already have a link.
+ * We do not need to set it up again.
+ */
+ if (igc_check_reset_block(hw))
+ goto out;
+
+ /* If requested flow control is set to default, set flow control
+ * based on the EEPROM flow control settings.
+ */
+ if (hw->fc.requested_mode == igc_fc_default) {
+ ret_val = igc_set_default_fc(hw);
+ if (ret_val)
+ goto out;
+ }
+
+ /* We want to save off the original Flow Control configuration just
+ * in case we get disconnected and then reconnected into a different
+ * hub or switch with different Flow Control capabilities.
+ */
+ hw->fc.current_mode = hw->fc.requested_mode;
+
+ hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+ /* Call the necessary media_type subroutine to configure the link. */
+ ret_val = hw->mac.ops.setup_physical_interface(hw);
+ if (ret_val)
+ goto out;
+
+ /* Initialize the flow control address, type, and PAUSE timer
+ * registers to their default values. This is done even if flow
+ * control is disabled, because it does not hurt anything to
+ * initialize these registers.
+ */
+ hw_dbg("Initializing the Flow Control address, type and timer regs\n");
+ wr32(IGC_FCT, FLOW_CONTROL_TYPE);
+ wr32(IGC_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+ wr32(IGC_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+ wr32(IGC_FCTTV, hw->fc.pause_time);
+
+ ret_val = igc_set_fc_watermarks(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_set_default_fc - Set flow control default values
+ * @hw: pointer to the HW structure
+ *
+ * Read the EEPROM for the default values for flow control and store the
+ * values.
+ */
+static s32 igc_set_default_fc(struct igc_hw *hw)
+{
+ hw->fc.requested_mode = igc_fc_full;
+ return 0;
+}
+
+/**
+ * igc_force_mac_fc - Force the MAC's flow control settings
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the
+ * device control register to reflect the adapter settings. TFCE and RFCE
+ * need to be explicitly set by software when a copper PHY is used because
+ * autonegotiation is managed by the PHY rather than the MAC. Software must
+ * also configure these bits when link is forced on a fiber connection.
+ */
+s32 igc_force_mac_fc(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+ u32 ctrl;
+
+ ctrl = rd32(IGC_CTRL);
+
+ /* Because we didn't get link via the internal auto-negotiation
+ * mechanism (we either forced link or we got link via PHY
+ * auto-neg), we have to manually enable/disable transmit an
+ * receive flow control.
+ *
+ * The "Case" statement below enables/disable flow control
+ * according to the "hw->fc.current_mode" parameter.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause
+ * frames but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames
+ * frames but we do not receive pause frames).
+ * 3: Both Rx and TX flow control (symmetric) is enabled.
+ * other: No other values should be possible at this point.
+ */
+ hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+ switch (hw->fc.current_mode) {
+ case igc_fc_none:
+ ctrl &= (~(IGC_CTRL_TFCE | IGC_CTRL_RFCE));
+ break;
+ case igc_fc_rx_pause:
+ ctrl &= (~IGC_CTRL_TFCE);
+ ctrl |= IGC_CTRL_RFCE;
+ break;
+ case igc_fc_tx_pause:
+ ctrl &= (~IGC_CTRL_RFCE);
+ ctrl |= IGC_CTRL_TFCE;
+ break;
+ case igc_fc_full:
+ ctrl |= (IGC_CTRL_TFCE | IGC_CTRL_RFCE);
+ break;
+ default:
+ hw_dbg("Flow control param set incorrectly\n");
+ ret_val = -IGC_ERR_CONFIG;
+ goto out;
+ }
+
+ wr32(IGC_CTRL, ctrl);
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_set_fc_watermarks - Set flow control high/low watermarks
+ * @hw: pointer to the HW structure
+ *
+ * Sets the flow control high/low threshold (watermark) registers. If
+ * flow control XON frame transmission is enabled, then set XON frame
+ * transmission as well.
+ */
+static s32 igc_set_fc_watermarks(struct igc_hw *hw)
+{
+ u32 fcrtl = 0, fcrth = 0;
+
+ /* Set the flow control receive threshold registers. Normally,
+ * these registers will be set to a default threshold that may be
+ * adjusted later by the driver's runtime code. However, if the
+ * ability to transmit pause frames is not enabled, then these
+ * registers will be set to 0.
+ */
+ if (hw->fc.current_mode & igc_fc_tx_pause) {
+ /* We need to set up the Receive Threshold high and low water
+ * marks as well as (optionally) enabling the transmission of
+ * XON frames.
+ */
+ fcrtl = hw->fc.low_water;
+ if (hw->fc.send_xon)
+ fcrtl |= IGC_FCRTL_XONE;
+
+ fcrth = hw->fc.high_water;
+ }
+ wr32(IGC_FCRTL, fcrtl);
+ wr32(IGC_FCRTH, fcrth);
+
+ return 0;
+}
+
+/**
+ * igc_clear_hw_cntrs_base - Clear base hardware counters
+ * @hw: pointer to the HW structure
+ *
+ * Clears the base hardware counters by reading the counter registers.
+ */
+void igc_clear_hw_cntrs_base(struct igc_hw *hw)
+{
+ rd32(IGC_CRCERRS);
+ rd32(IGC_SYMERRS);
+ rd32(IGC_MPC);
+ rd32(IGC_SCC);
+ rd32(IGC_ECOL);
+ rd32(IGC_MCC);
+ rd32(IGC_LATECOL);
+ rd32(IGC_COLC);
+ rd32(IGC_DC);
+ rd32(IGC_SEC);
+ rd32(IGC_RLEC);
+ rd32(IGC_XONRXC);
+ rd32(IGC_XONTXC);
+ rd32(IGC_XOFFRXC);
+ rd32(IGC_XOFFTXC);
+ rd32(IGC_FCRUC);
+ rd32(IGC_GPRC);
+ rd32(IGC_BPRC);
+ rd32(IGC_MPRC);
+ rd32(IGC_GPTC);
+ rd32(IGC_GORCL);
+ rd32(IGC_GORCH);
+ rd32(IGC_GOTCL);
+ rd32(IGC_GOTCH);
+ rd32(IGC_RNBC);
+ rd32(IGC_RUC);
+ rd32(IGC_RFC);
+ rd32(IGC_ROC);
+ rd32(IGC_RJC);
+ rd32(IGC_TORL);
+ rd32(IGC_TORH);
+ rd32(IGC_TOTL);
+ rd32(IGC_TOTH);
+ rd32(IGC_TPR);
+ rd32(IGC_TPT);
+ rd32(IGC_MPTC);
+ rd32(IGC_BPTC);
+
+ rd32(IGC_PRC64);
+ rd32(IGC_PRC127);
+ rd32(IGC_PRC255);
+ rd32(IGC_PRC511);
+ rd32(IGC_PRC1023);
+ rd32(IGC_PRC1522);
+ rd32(IGC_PTC64);
+ rd32(IGC_PTC127);
+ rd32(IGC_PTC255);
+ rd32(IGC_PTC511);
+ rd32(IGC_PTC1023);
+ rd32(IGC_PTC1522);
+
+ rd32(IGC_ALGNERRC);
+ rd32(IGC_RXERRC);
+ rd32(IGC_TNCRS);
+ rd32(IGC_CEXTERR);
+ rd32(IGC_TSCTC);
+ rd32(IGC_TSCTFC);
+
+ rd32(IGC_MGTPRC);
+ rd32(IGC_MGTPDC);
+ rd32(IGC_MGTPTC);
+
+ rd32(IGC_IAC);
+ rd32(IGC_ICRXOC);
+
+ rd32(IGC_ICRXPTC);
+ rd32(IGC_ICRXATC);
+ rd32(IGC_ICTXPTC);
+ rd32(IGC_ICTXATC);
+ rd32(IGC_ICTXQEC);
+ rd32(IGC_ICTXQMTC);
+ rd32(IGC_ICRXDMTC);
+
+ rd32(IGC_CBTMPC);
+ rd32(IGC_HTDPMC);
+ rd32(IGC_CBRMPC);
+ rd32(IGC_RPTHC);
+ rd32(IGC_HGPTC);
+ rd32(IGC_HTCBDPC);
+ rd32(IGC_HGORCL);
+ rd32(IGC_HGORCH);
+ rd32(IGC_HGOTCL);
+ rd32(IGC_HGOTCH);
+ rd32(IGC_LENERRS);
+}
+
+/**
+ * igc_rar_set - Set receive address register
+ * @hw: pointer to the HW structure
+ * @addr: pointer to the receive address
+ * @index: receive address array register
+ *
+ * Sets the receive address array register at index to the address passed
+ * in by addr.
+ */
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index)
+{
+ u32 rar_low, rar_high;
+
+ /* HW expects these in little endian so we reverse the byte order
+ * from network order (big endian) to little endian
+ */
+ rar_low = ((u32)addr[0] |
+ ((u32)addr[1] << 8) |
+ ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+ rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+ /* If MAC address zero, no need to set the AV bit */
+ if (rar_low || rar_high)
+ rar_high |= IGC_RAH_AV;
+
+ /* Some bridges will combine consecutive 32-bit writes into
+ * a single burst write, which will malfunction on some parts.
+ * The flushes avoid this.
+ */
+ wr32(IGC_RAL(index), rar_low);
+ wrfl();
+ wr32(IGC_RAH(index), rar_high);
+ wrfl();
+}
+
+/**
+ * igc_check_for_copper_link - Check for link (Copper)
+ * @hw: pointer to the HW structure
+ *
+ * Checks to see of the link status of the hardware has changed. If a
+ * change in link status has been detected, then we read the PHY registers
+ * to get the current speed/duplex if link exists.
+ */
+s32 igc_check_for_copper_link(struct igc_hw *hw)
+{
+ struct igc_mac_info *mac = &hw->mac;
+ s32 ret_val;
+ bool link;
+
+ /* We only want to go out to the PHY registers to see if Auto-Neg
+ * has completed and/or if our link status has changed. The
+ * get_link_status flag is set upon receiving a Link Status
+ * Change or Rx Sequence Error interrupt.
+ */
+ if (!mac->get_link_status) {
+ ret_val = 0;
+ goto out;
+ }
+
+ /* First we want to see if the MII Status Register reports
+ * link. If so, then we want to get the current speed/duplex
+ * of the PHY.
+ */
+ ret_val = igc_phy_has_link(hw, 1, 0, &link);
+ if (ret_val)
+ goto out;
+
+ if (!link)
+ goto out; /* No link detected */
+
+ mac->get_link_status = false;
+
+ /* Check if there was DownShift, must be checked
+ * immediately after link-up
+ */
+ igc_check_downshift(hw);
+
+ /* If we are forcing speed/duplex, then we simply return since
+ * we have already determined whether we have link or not.
+ */
+ if (!mac->autoneg) {
+ ret_val = -IGC_ERR_CONFIG;
+ goto out;
+ }
+
+ /* Auto-Neg is enabled. Auto Speed Detection takes care
+ * of MAC speed/duplex configuration. So we only need to
+ * configure Collision Distance in the MAC.
+ */
+ igc_config_collision_dist(hw);
+
+ /* Configure Flow Control now that Auto-Neg has completed.
+ * First, we need to restore the desired flow control
+ * settings because we may have had to re-autoneg with a
+ * different link partner.
+ */
+ ret_val = igc_config_fc_after_link_up(hw);
+ if (ret_val)
+ hw_dbg("Error configuring flow control\n");
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_config_collision_dist - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ */
+void igc_config_collision_dist(struct igc_hw *hw)
+{
+ u32 tctl;
+
+ tctl = rd32(IGC_TCTL);
+
+ tctl &= ~IGC_TCTL_COLD;
+ tctl |= IGC_COLLISION_DISTANCE << IGC_COLD_SHIFT;
+
+ wr32(IGC_TCTL, tctl);
+ wrfl();
+}
+
+/**
+ * igc_config_fc_after_link_up - Configures flow control after link
+ * @hw: pointer to the HW structure
+ *
+ * Checks the status of auto-negotiation after link up to ensure that the
+ * speed and duplex were not forced. If the link needed to be forced, then
+ * flow control needs to be forced also. If auto-negotiation is enabled
+ * and did not fail, then we configure flow control based on our link
+ * partner.
+ */
+s32 igc_config_fc_after_link_up(struct igc_hw *hw)
+{
+ u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+ struct igc_mac_info *mac = &hw->mac;
+ u16 speed, duplex;
+ s32 ret_val = 0;
+
+ /* Check for the case where we have fiber media and auto-neg failed
+ * so we had to force link. In this case, we need to force the
+ * configuration of the MAC to match the "fc" parameter.
+ */
+ if (mac->autoneg_failed) {
+ if (hw->phy.media_type == igc_media_type_copper)
+ ret_val = igc_force_mac_fc(hw);
+ }
+
+ if (ret_val) {
+ hw_dbg("Error forcing flow control settings\n");
+ goto out;
+ }
+
+ /* Check for the case where we have copper media and auto-neg is
+ * enabled. In this case, we need to check and see if Auto-Neg
+ * has completed, and if so, how the PHY and link partner has
+ * flow control configured.
+ */
+ if (hw->phy.media_type == igc_media_type_copper && mac->autoneg) {
+ /* Read the MII Status Register and check to see if AutoNeg
+ * has completed. We read this twice because this reg has
+ * some "sticky" (latched) bits.
+ */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+ &mii_status_reg);
+ if (ret_val)
+ goto out;
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+ &mii_status_reg);
+ if (ret_val)
+ goto out;
+
+ if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+ hw_dbg("Copper PHY and Auto Neg has not completed.\n");
+ goto out;
+ }
+
+ /* The AutoNeg process has completed, so we now need to
+ * read both the Auto Negotiation Advertisement
+ * Register (Address 4) and the Auto_Negotiation Base
+ * Page Ability Register (Address 5) to determine how
+ * flow control was negotiated.
+ */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+ &mii_nway_adv_reg);
+ if (ret_val)
+ goto out;
+ ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+ &mii_nway_lp_ability_reg);
+ if (ret_val)
+ goto out;
+ /* Two bits in the Auto Negotiation Advertisement Register
+ * (Address 4) and two bits in the Auto Negotiation Base
+ * Page Ability Register (Address 5) determine flow control
+ * for both the PHY and the link partner. The following
+ * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+ * 1999, describes these PAUSE resolution bits and how flow
+ * control is determined based upon these settings.
+ * NOTE: DC = Don't Care
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+ *-------|---------|-------|---------|--------------------
+ * 0 | 0 | DC | DC | igc_fc_none
+ * 0 | 1 | 0 | DC | igc_fc_none
+ * 0 | 1 | 1 | 0 | igc_fc_none
+ * 0 | 1 | 1 | 1 | igc_fc_tx_pause
+ * 1 | 0 | 0 | DC | igc_fc_none
+ * 1 | DC | 1 | DC | igc_fc_full
+ * 1 | 1 | 0 | 0 | igc_fc_none
+ * 1 | 1 | 0 | 1 | igc_fc_rx_pause
+ *
+ * Are both PAUSE bits set to 1? If so, this implies
+ * Symmetric Flow Control is enabled at both ends. The
+ * ASM_DIR bits are irrelevant per the spec.
+ *
+ * For Symmetric Flow Control:
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | DC | 1 | DC | IGC_fc_full
+ *
+ */
+ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+ /* Now we need to check if the user selected RX ONLY
+ * of pause frames. In this case, we had to advertise
+ * FULL flow control because we could not advertise RX
+ * ONLY. Hence, we must now check to see if we need to
+ * turn OFF the TRANSMISSION of PAUSE frames.
+ */
+ if (hw->fc.requested_mode == igc_fc_full) {
+ hw->fc.current_mode = igc_fc_full;
+ hw_dbg("Flow Control = FULL.\n");
+ } else {
+ hw->fc.current_mode = igc_fc_rx_pause;
+ hw_dbg("Flow Control = RX PAUSE frames only.\n");
+ }
+ }
+
+ /* For receiving PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 0 | 1 | 1 | 1 | igc_fc_tx_pause
+ */
+ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+ hw->fc.current_mode = igc_fc_tx_pause;
+ hw_dbg("Flow Control = TX PAUSE frames only.\n");
+ }
+ /* For transmitting PAUSE frames ONLY.
+ *
+ * LOCAL DEVICE | LINK PARTNER
+ * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+ *-------|---------|-------|---------|--------------------
+ * 1 | 1 | 0 | 1 | igc_fc_rx_pause
+ */
+ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+ (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+ !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+ (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+ hw->fc.current_mode = igc_fc_rx_pause;
+ hw_dbg("Flow Control = RX PAUSE frames only.\n");
+ }
+ /* Per the IEEE spec, at this point flow control should be
+ * disabled. However, we want to consider that we could
+ * be connected to a legacy switch that doesn't advertise
+ * desired flow control, but can be forced on the link
+ * partner. So if we advertised no flow control, that is
+ * what we will resolve to. If we advertised some kind of
+ * receive capability (Rx Pause Only or Full Flow Control)
+ * and the link partner advertised none, we will configure
+ * ourselves to enable Rx Flow Control only. We can do
+ * this safely for two reasons: If the link partner really
+ * didn't want flow control enabled, and we enable Rx, no
+ * harm done since we won't be receiving any PAUSE frames
+ * anyway. If the intent on the link partner was to have
+ * flow control enabled, then by us enabling RX only, we
+ * can at least receive pause frames and process them.
+ * This is a good idea because in most cases, since we are
+ * predominantly a server NIC, more times than not we will
+ * be asked to delay transmission of packets than asking
+ * our link partner to pause transmission of frames.
+ */
+ else if ((hw->fc.requested_mode == igc_fc_none) ||
+ (hw->fc.requested_mode == igc_fc_tx_pause) ||
+ (hw->fc.strict_ieee)) {
+ hw->fc.current_mode = igc_fc_none;
+ hw_dbg("Flow Control = NONE.\n");
+ } else {
+ hw->fc.current_mode = igc_fc_rx_pause;
+ hw_dbg("Flow Control = RX PAUSE frames only.\n");
+ }
+
+ /* Now we need to do one last check... If we auto-
+ * negotiated to HALF DUPLEX, flow control should not be
+ * enabled per IEEE 802.3 spec.
+ */
+ ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
+ if (ret_val) {
+ hw_dbg("Error getting link speed and duplex\n");
+ goto out;
+ }
+
+ if (duplex == HALF_DUPLEX)
+ hw->fc.current_mode = igc_fc_none;
+
+ /* Now we call a subroutine to actually force the MAC
+ * controller to use the correct flow control settings.
+ */
+ ret_val = igc_force_mac_fc(hw);
+ if (ret_val) {
+ hw_dbg("Error forcing flow control settings\n");
+ goto out;
+ }
+ }
+
+out:
+ return 0;
+}
+
+/**
+ * igc_get_auto_rd_done - Check for auto read completion
+ * @hw: pointer to the HW structure
+ *
+ * Check EEPROM for Auto Read done bit.
+ */
+s32 igc_get_auto_rd_done(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+ s32 i = 0;
+
+ while (i < AUTO_READ_DONE_TIMEOUT) {
+ if (rd32(IGC_EECD) & IGC_EECD_AUTO_RD)
+ break;
+ usleep_range(1000, 2000);
+ i++;
+ }
+
+ if (i == AUTO_READ_DONE_TIMEOUT) {
+ hw_dbg("Auto read by HW from NVM has not completed.\n");
+ ret_val = -IGC_ERR_RESET;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_get_speed_and_duplex_copper - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Read the status register for the current speed/duplex and store the current
+ * speed and duplex for copper connections.
+ */
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+ u16 *duplex)
+{
+ u32 status;
+
+ status = rd32(IGC_STATUS);
+ if (status & IGC_STATUS_SPEED_1000) {
+ /* For I225, STATUS will indicate 1G speed in both 1 Gbps
+ * and 2.5 Gbps link modes. An additional bit is used
+ * to differentiate between 1 Gbps and 2.5 Gbps.
+ */
+ if (hw->mac.type == igc_i225 &&
+ (status & IGC_STATUS_SPEED_2500)) {
+ *speed = SPEED_2500;
+ hw_dbg("2500 Mbs, ");
+ } else {
+ *speed = SPEED_1000;
+ hw_dbg("1000 Mbs, ");
+ }
+ } else if (status & IGC_STATUS_SPEED_100) {
+ *speed = SPEED_100;
+ hw_dbg("100 Mbs, ");
+ } else {
+ *speed = SPEED_10;
+ hw_dbg("10 Mbs, ");
+ }
+
+ if (status & IGC_STATUS_FD) {
+ *duplex = FULL_DUPLEX;
+ hw_dbg("Full Duplex\n");
+ } else {
+ *duplex = HALF_DUPLEX;
+ hw_dbg("Half Duplex\n");
+ }
+
+ return 0;
+}
+
+/**
+ * igc_put_hw_semaphore - Release hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Release hardware semaphore used to access the PHY or NVM
+ */
+void igc_put_hw_semaphore(struct igc_hw *hw)
+{
+ u32 swsm;
+
+ swsm = rd32(IGC_SWSM);
+
+ swsm &= ~(IGC_SWSM_SMBI | IGC_SWSM_SWESMBI);
+
+ wr32(IGC_SWSM, swsm);
+}
+
+/**
+ * igc_enable_mng_pass_thru - Enable processing of ARP's
+ * @hw: pointer to the HW structure
+ *
+ * Verifies the hardware needs to leave interface enabled so that frames can
+ * be directed to and from the management interface.
+ */
+bool igc_enable_mng_pass_thru(struct igc_hw *hw)
+{
+ bool ret_val = false;
+ u32 fwsm, factps;
+ u32 manc;
+
+ if (!hw->mac.asf_firmware_present)
+ goto out;
+
+ manc = rd32(IGC_MANC);
+
+ if (!(manc & IGC_MANC_RCV_TCO_EN))
+ goto out;
+
+ if (hw->mac.arc_subsystem_valid) {
+ fwsm = rd32(IGC_FWSM);
+ factps = rd32(IGC_FACTPS);
+
+ if (!(factps & IGC_FACTPS_MNGCG) &&
+ ((fwsm & IGC_FWSM_MODE_MASK) ==
+ (igc_mng_mode_pt << IGC_FWSM_MODE_SHIFT))) {
+ ret_val = true;
+ goto out;
+ }
+ } else {
+ if ((manc & IGC_MANC_SMBUS_EN) &&
+ !(manc & IGC_MANC_ASF_EN)) {
+ ret_val = true;
+ goto out;
+ }
+ }
+
+out:
+ return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h
new file mode 100644
index 000000000000..782bc995badc
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_MAC_H_
+#define _IGC_MAC_H_
+
+#include "igc_hw.h"
+#include "igc_phy.h"
+#include "igc_defines.h"
+
+#ifndef IGC_REMOVED
+#define IGC_REMOVED(a) (0)
+#endif /* IGC_REMOVED */
+
+/* forward declaration */
+s32 igc_disable_pcie_master(struct igc_hw *hw);
+s32 igc_check_for_copper_link(struct igc_hw *hw);
+s32 igc_config_fc_after_link_up(struct igc_hw *hw);
+s32 igc_force_mac_fc(struct igc_hw *hw);
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count);
+s32 igc_setup_link(struct igc_hw *hw);
+void igc_clear_hw_cntrs_base(struct igc_hw *hw);
+s32 igc_get_auto_rd_done(struct igc_hw *hw);
+void igc_put_hw_semaphore(struct igc_hw *hw);
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index);
+void igc_config_collision_dist(struct igc_hw *hw);
+
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+ u16 *duplex);
+
+bool igc_enable_mng_pass_thru(struct igc_hw *hw);
+
+enum igc_mng_mode {
+ igc_mng_mode_none = 0,
+ igc_mng_mode_asf,
+ igc_mng_mode_pt,
+ igc_mng_mode_ipmi,
+ igc_mng_mode_host_if_only
+};
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
new file mode 100644
index 000000000000..9d85707e8a81
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -0,0 +1,3901 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Intel Corporation */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <linux/aer.h>
+
+#include "igc.h"
+#include "igc_hw.h"
+
+#define DRV_VERSION "0.0.1-k"
+#define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver"
+
+static int debug = -1;
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+char igc_driver_name[] = "igc";
+char igc_driver_version[] = DRV_VERSION;
+static const char igc_driver_string[] = DRV_SUMMARY;
+static const char igc_copyright[] =
+ "Copyright(c) 2018 Intel Corporation.";
+
+static const struct igc_info *igc_info_tbl[] = {
+ [board_base] = &igc_base_info,
+};
+
+static const struct pci_device_id igc_pci_tbl[] = {
+ { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
+ { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
+ /* required last entry */
+ {0, }
+};
+
+MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
+
+/* forward declaration */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring);
+static int igc_sw_init(struct igc_adapter *);
+static void igc_configure(struct igc_adapter *adapter);
+static void igc_power_down_link(struct igc_adapter *adapter);
+static void igc_set_default_mac_filter(struct igc_adapter *adapter);
+static void igc_set_rx_mode(struct net_device *netdev);
+static void igc_write_itr(struct igc_q_vector *q_vector);
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector);
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx);
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+ bool msix);
+static void igc_free_q_vectors(struct igc_adapter *adapter);
+static void igc_irq_disable(struct igc_adapter *adapter);
+static void igc_irq_enable(struct igc_adapter *adapter);
+static void igc_configure_msix(struct igc_adapter *adapter);
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *bi);
+
+enum latency_range {
+ lowest_latency = 0,
+ low_latency = 1,
+ bulk_latency = 2,
+ latency_invalid = 255
+};
+
+static void igc_reset(struct igc_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ struct igc_hw *hw = &adapter->hw;
+
+ hw->mac.ops.reset_hw(hw);
+
+ if (hw->mac.ops.init_hw(hw))
+ dev_err(&pdev->dev, "Hardware Error\n");
+
+ if (!netif_running(adapter->netdev))
+ igc_power_down_link(adapter);
+
+ igc_get_phy_info(hw);
+}
+
+/**
+ * igc_power_up_link - Power up the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_up_link(struct igc_adapter *adapter)
+{
+ igc_reset_phy(&adapter->hw);
+
+ if (adapter->hw.phy.media_type == igc_media_type_copper)
+ igc_power_up_phy_copper(&adapter->hw);
+
+ igc_setup_link(&adapter->hw);
+}
+
+/**
+ * igc_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_down_link(struct igc_adapter *adapter)
+{
+ if (adapter->hw.phy.media_type == igc_media_type_copper)
+ igc_power_down_phy_copper_base(&adapter->hw);
+}
+
+/**
+ * igc_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded.
+ */
+static void igc_release_hw_control(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 ctrl_ext;
+
+ /* Let firmware take over control of h/w */
+ ctrl_ext = rd32(IGC_CTRL_EXT);
+ wr32(IGC_CTRL_EXT,
+ ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded.
+ */
+static void igc_get_hw_control(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 ctrl_ext;
+
+ /* Let firmware know the driver has taken over */
+ ctrl_ext = rd32(IGC_CTRL_EXT);
+ wr32(IGC_CTRL_EXT,
+ ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_tx_resources(struct igc_ring *tx_ring)
+{
+ igc_clean_tx_ring(tx_ring);
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!tx_ring->desc)
+ return;
+
+ dma_free_coherent(tx_ring->dev, tx_ring->size,
+ tx_ring->desc, tx_ring->dma);
+
+ tx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_all_tx_resources(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ igc_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring)
+{
+ u16 i = tx_ring->next_to_clean;
+ struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+
+ while (i != tx_ring->next_to_use) {
+ union igc_adv_tx_desc *eop_desc, *tx_desc;
+
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* check for eop_desc to determine the end of the packet */
+ eop_desc = tx_buffer->next_to_watch;
+ tx_desc = IGC_TX_DESC(tx_ring, i);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IGC_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ }
+ }
+
+ /* reset BQL for queue */
+ netdev_tx_reset_queue(txring_txq(tx_ring));
+
+ /* reset next_to_use and next_to_clean */
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+}
+
+/**
+ * igc_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ if (adapter->tx_ring[i])
+ igc_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_tx_resources(struct igc_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ int size = 0;
+
+ size = sizeof(struct igc_tx_buffer) * tx_ring->count;
+ tx_ring->tx_buffer_info = vzalloc(size);
+ if (!tx_ring->tx_buffer_info)
+ goto err;
+
+ /* round up to nearest 4K */
+ tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
+ tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+ tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
+
+ if (!tx_ring->desc)
+ goto err;
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+
+ return 0;
+
+err:
+ vfree(tx_ring->tx_buffer_info);
+ dev_err(dev,
+ "Unable to allocate memory for the transmit descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ err = igc_setup_tx_resources(adapter->tx_ring[i]);
+ if (err) {
+ dev_err(&pdev->dev,
+ "Allocation for Tx Queue %u failed\n", i);
+ for (i--; i >= 0; i--)
+ igc_free_tx_resources(adapter->tx_ring[i]);
+ break;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * igc_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ */
+static void igc_clean_rx_ring(struct igc_ring *rx_ring)
+{
+ u16 i = rx_ring->next_to_clean;
+
+ if (rx_ring->skb)
+ dev_kfree_skb(rx_ring->skb);
+ rx_ring->skb = NULL;
+
+ /* Free all the Rx ring sk_buffs */
+ while (i != rx_ring->next_to_alloc) {
+ struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+
+ /* Invalidate cache lines that may have been written to by
+ * device so that we avoid corrupting memory.
+ */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ buffer_info->dma,
+ buffer_info->page_offset,
+ igc_rx_bufsz(rx_ring),
+ DMA_FROM_DEVICE);
+
+ /* free resources associated with mapping */
+ dma_unmap_page_attrs(rx_ring->dev,
+ buffer_info->dma,
+ igc_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IGC_RX_DMA_ATTR);
+ __page_frag_cache_drain(buffer_info->page,
+ buffer_info->pagecnt_bias);
+
+ i++;
+ if (i == rx_ring->count)
+ i = 0;
+ }
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+}
+
+/**
+ * igc_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ if (adapter->rx_ring[i])
+ igc_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+static void igc_free_rx_resources(struct igc_ring *rx_ring)
+{
+ igc_clean_rx_ring(rx_ring);
+
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!rx_ring->desc)
+ return;
+
+ dma_free_coherent(rx_ring->dev, rx_ring->size,
+ rx_ring->desc, rx_ring->dma);
+
+ rx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void igc_free_all_rx_resources(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ igc_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_setup_rx_resources(struct igc_ring *rx_ring)
+{
+ struct device *dev = rx_ring->dev;
+ int size, desc_len;
+
+ size = sizeof(struct igc_rx_buffer) * rx_ring->count;
+ rx_ring->rx_buffer_info = vzalloc(size);
+ if (!rx_ring->rx_buffer_info)
+ goto err;
+
+ desc_len = sizeof(union igc_adv_rx_desc);
+
+ /* Round up to nearest 4K */
+ rx_ring->size = rx_ring->count * desc_len;
+ rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+ rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+
+ if (!rx_ring->desc)
+ goto err;
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+
+ return 0;
+
+err:
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ dev_err(dev,
+ "Unable to allocate memory for the receive descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_rx_resources - wrapper to allocate Rx resources
+ * (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int i, err = 0;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ err = igc_setup_rx_resources(adapter->rx_ring[i]);
+ if (err) {
+ dev_err(&pdev->dev,
+ "Allocation for Rx Queue %u failed\n", i);
+ for (i--; i >= 0; i--)
+ igc_free_rx_resources(adapter->rx_ring[i]);
+ break;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * igc_configure_rx_ring - Configure a receive ring after Reset
+ * @adapter: board private structure
+ * @ring: receive ring to be configured
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx_ring(struct igc_adapter *adapter,
+ struct igc_ring *ring)
+{
+ struct igc_hw *hw = &adapter->hw;
+ union igc_adv_rx_desc *rx_desc;
+ int reg_idx = ring->reg_idx;
+ u32 srrctl = 0, rxdctl = 0;
+ u64 rdba = ring->dma;
+
+ /* disable the queue */
+ wr32(IGC_RXDCTL(reg_idx), 0);
+
+ /* Set DMA base address registers */
+ wr32(IGC_RDBAL(reg_idx),
+ rdba & 0x00000000ffffffffULL);
+ wr32(IGC_RDBAH(reg_idx), rdba >> 32);
+ wr32(IGC_RDLEN(reg_idx),
+ ring->count * sizeof(union igc_adv_rx_desc));
+
+ /* initialize head and tail */
+ ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
+ wr32(IGC_RDH(reg_idx), 0);
+ writel(0, ring->tail);
+
+ /* reset next-to- use/clean to place SW in sync with hardware */
+ ring->next_to_clean = 0;
+ ring->next_to_use = 0;
+
+ /* set descriptor configuration */
+ srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ if (ring_uses_large_buffer(ring))
+ srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+ else
+ srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+ srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+ wr32(IGC_SRRCTL(reg_idx), srrctl);
+
+ rxdctl |= IGC_RX_PTHRESH;
+ rxdctl |= IGC_RX_HTHRESH << 8;
+ rxdctl |= IGC_RX_WTHRESH << 16;
+
+ /* initialize rx_buffer_info */
+ memset(ring->rx_buffer_info, 0,
+ sizeof(struct igc_rx_buffer) * ring->count);
+
+ /* initialize Rx descriptor 0 */
+ rx_desc = IGC_RX_DESC(ring, 0);
+ rx_desc->wb.upper.length = 0;
+
+ /* enable receive descriptor fetching */
+ rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
+
+ wr32(IGC_RXDCTL(reg_idx), rxdctl);
+}
+
+/**
+ * igc_configure_rx - Configure receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx(struct igc_adapter *adapter)
+{
+ int i;
+
+ /* Setup the HW Rx Head and Tail Descriptor Pointers and
+ * the Base and Length of the Rx Descriptor Ring
+ */
+ for (i = 0; i < adapter->num_rx_queues; i++)
+ igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
+}
+
+/**
+ * igc_configure_tx_ring - Configure transmit ring after Reset
+ * @adapter: board private structure
+ * @ring: tx ring to configure
+ *
+ * Configure a transmit ring after a reset.
+ */
+static void igc_configure_tx_ring(struct igc_adapter *adapter,
+ struct igc_ring *ring)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int reg_idx = ring->reg_idx;
+ u64 tdba = ring->dma;
+ u32 txdctl = 0;
+
+ /* disable the queue */
+ wr32(IGC_TXDCTL(reg_idx), 0);
+ wrfl();
+ mdelay(10);
+
+ wr32(IGC_TDLEN(reg_idx),
+ ring->count * sizeof(union igc_adv_tx_desc));
+ wr32(IGC_TDBAL(reg_idx),
+ tdba & 0x00000000ffffffffULL);
+ wr32(IGC_TDBAH(reg_idx), tdba >> 32);
+
+ ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
+ wr32(IGC_TDH(reg_idx), 0);
+ writel(0, ring->tail);
+
+ txdctl |= IGC_TX_PTHRESH;
+ txdctl |= IGC_TX_HTHRESH << 8;
+ txdctl |= IGC_TX_WTHRESH << 16;
+
+ txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
+ wr32(IGC_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * igc_configure_tx - Configure transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ */
+static void igc_configure_tx(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_mrqc(struct igc_adapter *adapter)
+{
+}
+
+/**
+ * igc_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_rctl(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 rctl;
+
+ rctl = rd32(IGC_RCTL);
+
+ rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
+ rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
+
+ rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
+ (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
+
+ /* enable stripping of CRC. Newer features require
+ * that the HW strips the CRC.
+ */
+ rctl |= IGC_RCTL_SECRC;
+
+ /* disable store bad packets and clear size bits. */
+ rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
+
+ /* enable LPE to allow for reception of jumbo frames */
+ rctl |= IGC_RCTL_LPE;
+
+ /* disable queue 0 to prevent tail write w/o re-config */
+ wr32(IGC_RXDCTL(0), 0);
+
+ /* This is useful for sniffing bad packets. */
+ if (adapter->netdev->features & NETIF_F_RXALL) {
+ /* UPE and MPE will be handled by normal PROMISC logic
+ * in set_rx_mode
+ */
+ rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
+ IGC_RCTL_BAM | /* RX All Bcast Pkts */
+ IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
+
+ rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
+ IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
+ }
+
+ wr32(IGC_RCTL, rctl);
+}
+
+/**
+ * igc_setup_tctl - configure the transmit control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_tctl(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 tctl;
+
+ /* disable queue 0 which icould be enabled by default */
+ wr32(IGC_TXDCTL(0), 0);
+
+ /* Program the Transmit Control Register */
+ tctl = rd32(IGC_TCTL);
+ tctl &= ~IGC_TCTL_CT;
+ tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
+ (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
+
+ /* Enable transmits */
+ tctl |= IGC_TCTL_EN;
+
+ wr32(IGC_TCTL, tctl);
+}
+
+/**
+ * igc_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_set_mac(struct net_device *netdev, void *p)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct igc_hw *hw = &adapter->hw;
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+ /* set the correct pool for the new PF MAC address in entry 0 */
+ igc_set_default_mac_filter(adapter);
+
+ return 0;
+}
+
+static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
+{
+}
+
+static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+ struct net_device *netdev = tx_ring->netdev;
+
+ netif_stop_subqueue(netdev, tx_ring->queue_index);
+
+ /* memory barriier comment */
+ smp_mb();
+
+ /* We need to check again in a case another CPU has just
+ * made room available.
+ */
+ if (igc_desc_unused(tx_ring) < size)
+ return -EBUSY;
+
+ /* A reprieve! */
+ netif_wake_subqueue(netdev, tx_ring->queue_index);
+
+ u64_stats_update_begin(&tx_ring->tx_syncp2);
+ tx_ring->tx_stats.restart_queue2++;
+ u64_stats_update_end(&tx_ring->tx_syncp2);
+
+ return 0;
+}
+
+static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+ if (igc_desc_unused(tx_ring) >= size)
+ return 0;
+ return __igc_maybe_stop_tx(tx_ring, size);
+}
+
+static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+ /* set type for advanced descriptor with frame checksum insertion */
+ u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
+ IGC_ADVTXD_DCMD_DEXT |
+ IGC_ADVTXD_DCMD_IFCS;
+
+ return cmd_type;
+}
+
+static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
+ union igc_adv_tx_desc *tx_desc,
+ u32 tx_flags, unsigned int paylen)
+{
+ u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
+
+ /* insert L4 checksum */
+ olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
+ ((IGC_TXD_POPTS_TXSM << 8) /
+ IGC_TX_FLAGS_CSUM);
+
+ /* insert IPv4 checksum */
+ olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
+ (((IGC_TXD_POPTS_IXSM << 8)) /
+ IGC_TX_FLAGS_IPV4);
+
+ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int igc_tx_map(struct igc_ring *tx_ring,
+ struct igc_tx_buffer *first,
+ const u8 hdr_len)
+{
+ struct sk_buff *skb = first->skb;
+ struct igc_tx_buffer *tx_buffer;
+ union igc_adv_tx_desc *tx_desc;
+ u32 tx_flags = first->tx_flags;
+ struct skb_frag_struct *frag;
+ u16 i = tx_ring->next_to_use;
+ unsigned int data_len, size;
+ dma_addr_t dma;
+ u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
+
+ tx_desc = IGC_TX_DESC(tx_ring, i);
+
+ igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+ size = skb_headlen(skb);
+ data_len = skb->data_len;
+
+ dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+ tx_buffer = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buffer, len, size);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
+ tx_desc->read.cmd_type_len =
+ cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = IGC_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ dma += IGC_MAX_DATA_PER_TXD;
+ size -= IGC_MAX_DATA_PER_TXD;
+
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+ }
+
+ if (likely(!data_len))
+ break;
+
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+ i++;
+ tx_desc++;
+ if (i == tx_ring->count) {
+ tx_desc = IGC_TX_DESC(tx_ring, 0);
+ i = 0;
+ }
+ tx_desc->read.olinfo_status = 0;
+
+ size = skb_frag_size(frag);
+ data_len -= size;
+
+ dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+ size, DMA_TO_DEVICE);
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ }
+
+ /* write last descriptor with RS and EOP bits */
+ cmd_type |= size | IGC_TXD_DCMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+ netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+ /* set the timestamp */
+ first->time_stamp = jiffies;
+
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch. (Only applicable for weak-ordered
+ * memory model archs, such as IA-64).
+ *
+ * We also need this memory barrier to make certain all of the
+ * status bits have been updated before next_to_watch is written.
+ */
+ wmb();
+
+ /* set next_to_watch value indicating a packet is present */
+ first->next_to_watch = tx_desc;
+
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+
+ tx_ring->next_to_use = i;
+
+ /* Make sure there is space in the ring for the next send. */
+ igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+ writel(i, tx_ring->tail);
+
+ /* we need this if more than one processor can write to our tail
+ * at a time, it synchronizes IO on IA64/Altix systems
+ */
+ mmiowb();
+ }
+
+ return 0;
+dma_error:
+ dev_err(tx_ring->dev, "TX DMA map failed\n");
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+
+ /* clear dma mappings for failed tx_buffer_info map */
+ while (tx_buffer != first) {
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ if (i-- == 0)
+ i += tx_ring->count;
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ }
+
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ dev_kfree_skb_any(tx_buffer->skb);
+ tx_buffer->skb = NULL;
+
+ tx_ring->next_to_use = i;
+
+ return -1;
+}
+
+static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+ struct igc_ring *tx_ring)
+{
+ u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ __be16 protocol = vlan_get_protocol(skb);
+ struct igc_tx_buffer *first;
+ u32 tx_flags = 0;
+ unsigned short f;
+ u8 hdr_len = 0;
+
+ /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
+ * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
+ * + 2 desc gap to keep tail from touching head,
+ * + 1 desc for context descriptor,
+ * otherwise try next time
+ */
+ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+ count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+
+ if (igc_maybe_stop_tx(tx_ring, count + 3)) {
+ /* this is a hard error */
+ return NETDEV_TX_BUSY;
+ }
+
+ /* record the location of the first descriptor for this packet */
+ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ first->skb = skb;
+ first->bytecount = skb->len;
+ first->gso_segs = 1;
+
+ skb_tx_timestamp(skb);
+
+ /* record initial flags and protocol */
+ first->tx_flags = tx_flags;
+ first->protocol = protocol;
+
+ igc_tx_csum(tx_ring, first);
+
+ igc_tx_map(tx_ring, first, hdr_len);
+
+ return NETDEV_TX_OK;
+}
+
+static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
+ struct sk_buff *skb)
+{
+ unsigned int r_idx = skb->queue_mapping;
+
+ if (r_idx >= adapter->num_tx_queues)
+ r_idx = r_idx % adapter->num_tx_queues;
+
+ return adapter->tx_ring[r_idx];
+}
+
+static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
+ * in order to meet this minimum size requirement.
+ */
+ if (skb->len < 17) {
+ if (skb_padto(skb, 17))
+ return NETDEV_TX_OK;
+ skb->len = 17;
+ }
+
+ return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
+}
+
+static inline void igc_rx_hash(struct igc_ring *ring,
+ union igc_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ if (ring->netdev->features & NETIF_F_RXHASH)
+ skb_set_hash(skb,
+ le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+ PKT_HASH_TYPE_L3);
+}
+
+/**
+ * igc_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ */
+static void igc_process_skb_fields(struct igc_ring *rx_ring,
+ union igc_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ igc_rx_hash(rx_ring, rx_desc, skb);
+
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
+ const unsigned int size)
+{
+ struct igc_rx_buffer *rx_buffer;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ prefetchw(rx_buffer->page);
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+
+ rx_buffer->pagecnt_bias--;
+
+ return rx_buffer;
+}
+
+/**
+ * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ */
+static void igc_add_rx_frag(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *rx_buffer,
+ struct sk_buff *skb,
+ unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+ rx_buffer->page_offset, size, truesize);
+ rx_buffer->page_offset ^= truesize;
+#else
+ unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+ rx_buffer->page_offset, size, truesize);
+ rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *rx_buffer,
+ union igc_adv_rx_desc *rx_desc,
+ unsigned int size)
+{
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(IGC_SKB_PAD + size);
+#endif
+ struct sk_buff *skb;
+
+ /* prefetch first cache line of first page */
+ prefetch(va);
+#if L1_CACHE_BYTES < 128
+ prefetch(va + L1_CACHE_BYTES);
+#endif
+
+ /* build an skb around the page buffer */
+ skb = build_skb(va - IGC_SKB_PAD, truesize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, IGC_SKB_PAD);
+ __skb_put(skb, size);
+
+ /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
+
+ return skb;
+}
+
+static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *rx_buffer,
+ union igc_adv_rx_desc *rx_desc,
+ unsigned int size)
+{
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+ unsigned int headlen;
+ struct sk_buff *skb;
+
+ /* prefetch first cache line of first page */
+ prefetch(va);
+#if L1_CACHE_BYTES < 128
+ prefetch(va + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* Determine available headroom for copy */
+ headlen = size;
+ if (headlen > IGC_RX_HDR_LEN)
+ headlen = eth_get_headlen(va, IGC_RX_HDR_LEN);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+ /* update all of the pointers */
+ size -= headlen;
+ if (size) {
+ skb_add_rx_frag(skb, 0, rx_buffer->page,
+ (va + headlen) - page_address(rx_buffer->page),
+ size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
+ } else {
+ rx_buffer->pagecnt_bias++;
+ }
+
+ return skb;
+}
+
+/**
+ * igc_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void igc_reuse_rx_page(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *old_buff)
+{
+ u16 nta = rx_ring->next_to_alloc;
+ struct igc_rx_buffer *new_buff;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* Transfer page from old buffer to new buffer.
+ * Move each member individually to avoid possible store
+ * forwarding stalls.
+ */
+ new_buff->dma = old_buff->dma;
+ new_buff->page = old_buff->page;
+ new_buff->page_offset = old_buff->page_offset;
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
+}
+
+static inline bool igc_page_is_reserved(struct page *page)
+{
+ return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
+{
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+ struct page *page = rx_buffer->page;
+
+ /* avoid re-using remote pages */
+ if (unlikely(igc_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+ return false;
+#else
+#define IGC_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
+
+ if (rx_buffer->page_offset > IGC_LAST_OFFSET)
+ return false;
+#endif
+
+ /* If we have drained the page fragment pool we need to update
+ * the pagecnt_bias and page count so that we fully restock the
+ * number of references the driver holds.
+ */
+ if (unlikely(!pagecnt_bias)) {
+ page_ref_add(page, USHRT_MAX);
+ rx_buffer->pagecnt_bias = USHRT_MAX;
+ }
+
+ return true;
+}
+
+/**
+ * igc_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ */
+static bool igc_is_non_eop(struct igc_ring *rx_ring,
+ union igc_adv_rx_desc *rx_desc)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(IGC_RX_DESC(rx_ring, ntc));
+
+ if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
+ return false;
+
+ return true;
+}
+
+/**
+ * igc_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool igc_cleanup_headers(struct igc_ring *rx_ring,
+ union igc_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ if (unlikely((igc_test_staterr(rx_desc,
+ IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+ struct net_device *netdev = rx_ring->netdev;
+
+ if (!(netdev->features & NETIF_F_RXALL)) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+ }
+
+ /* if eth_skb_pad returns an error the skb was freed */
+ if (eth_skb_pad(skb))
+ return true;
+
+ return false;
+}
+
+static void igc_put_rx_buffer(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *rx_buffer)
+{
+ if (igc_can_reuse_rx_page(rx_buffer)) {
+ /* hand second half of page back to the ring */
+ igc_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+ IGC_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+ }
+
+ /* clear contents of rx_buffer */
+ rx_buffer->page = NULL;
+}
+
+/**
+ * igc_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @adapter: address of board private structure
+ */
+static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
+{
+ union igc_adv_rx_desc *rx_desc;
+ u16 i = rx_ring->next_to_use;
+ struct igc_rx_buffer *bi;
+ u16 bufsz;
+
+ /* nothing to do */
+ if (!cleaned_count)
+ return;
+
+ rx_desc = IGC_RX_DESC(rx_ring, i);
+ bi = &rx_ring->rx_buffer_info[i];
+ i -= rx_ring->count;
+
+ bufsz = igc_rx_bufsz(rx_ring);
+
+ do {
+ if (!igc_alloc_mapped_page(rx_ring, bi))
+ break;
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+ bi->page_offset, bufsz,
+ DMA_FROM_DEVICE);
+
+ /* Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+ rx_desc++;
+ bi++;
+ i++;
+ if (unlikely(!i)) {
+ rx_desc = IGC_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_buffer_info;
+ i -= rx_ring->count;
+ }
+
+ /* clear the length for the next_to_use descriptor */
+ rx_desc->wb.upper.length = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ i += rx_ring->count;
+
+ if (rx_ring->next_to_use != i) {
+ /* record the next descriptor to use */
+ rx_ring->next_to_use = i;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(i, rx_ring->tail);
+ }
+}
+
+static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+{
+ unsigned int total_bytes = 0, total_packets = 0;
+ struct igc_ring *rx_ring = q_vector->rx.ring;
+ struct sk_buff *skb = rx_ring->skb;
+ u16 cleaned_count = igc_desc_unused(rx_ring);
+
+ while (likely(total_packets < budget)) {
+ union igc_adv_rx_desc *rx_desc;
+ struct igc_rx_buffer *rx_buffer;
+ unsigned int size;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
+ igc_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+ if (!size)
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * descriptor has been written back
+ */
+ dma_rmb();
+
+ rx_buffer = igc_get_rx_buffer(rx_ring, size);
+
+ /* retrieve a buffer from the ring */
+ if (skb)
+ igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
+ else if (ring_uses_build_skb(rx_ring))
+ skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
+ else
+ skb = igc_construct_skb(rx_ring, rx_buffer,
+ rx_desc, size);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_failed++;
+ rx_buffer->pagecnt_bias++;
+ break;
+ }
+
+ igc_put_rx_buffer(rx_ring, rx_buffer);
+ cleaned_count++;
+
+ /* fetch next buffer in frame if non-eop */
+ if (igc_is_non_eop(rx_ring, rx_desc))
+ continue;
+
+ /* verify the packet layout is correct */
+ if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
+ skb = NULL;
+ continue;
+ }
+
+ /* probably a little skewed due to removing CRC */
+ total_bytes += skb->len;
+
+ /* populate checksum, timestamp, VLAN, and protocol */
+ igc_process_skb_fields(rx_ring, rx_desc, skb);
+
+ napi_gro_receive(&q_vector->napi, skb);
+
+ /* reset skb pointer */
+ skb = NULL;
+
+ /* update budget accounting */
+ total_packets++;
+ }
+
+ /* place incomplete frames back on ring for completion */
+ rx_ring->skb = skb;
+
+ u64_stats_update_begin(&rx_ring->rx_syncp);
+ rx_ring->rx_stats.packets += total_packets;
+ rx_ring->rx_stats.bytes += total_bytes;
+ u64_stats_update_end(&rx_ring->rx_syncp);
+ q_vector->rx.total_packets += total_packets;
+ q_vector->rx.total_bytes += total_bytes;
+
+ if (cleaned_count)
+ igc_alloc_rx_buffers(rx_ring, cleaned_count);
+
+ return total_packets;
+}
+
+static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
+{
+ return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+}
+
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ /* alloc new page for storage */
+ page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ /* map page for use */
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+ igc_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IGC_RX_DMA_ATTR);
+
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_page(page);
+
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = igc_rx_offset(rx_ring);
+ bi->pagecnt_bias = 1;
+
+ return true;
+}
+
+/**
+ * igc_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: pointer to q_vector containing needed info
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * returns true if ring is completely cleaned
+ */
+static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int budget = q_vector->tx.work_limit;
+ struct igc_ring *tx_ring = q_vector->tx.ring;
+ unsigned int i = tx_ring->next_to_clean;
+ struct igc_tx_buffer *tx_buffer;
+ union igc_adv_tx_desc *tx_desc;
+
+ if (test_bit(__IGC_DOWN, &adapter->state))
+ return true;
+
+ tx_buffer = &tx_ring->tx_buffer_info[i];
+ tx_desc = IGC_TX_DESC(tx_ring, i);
+ i -= tx_ring->count;
+
+ do {
+ union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+ /* if next_to_watch is not set then there is no work pending */
+ if (!eop_desc)
+ break;
+
+ /* prevent any other reads prior to eop_desc */
+ smp_rmb();
+
+ /* if DD is not set pending work has not been completed */
+ if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
+ break;
+
+ /* clear next_to_watch to prevent false hangs */
+ tx_buffer->next_to_watch = NULL;
+
+ /* update the statistics for this packet */
+ total_bytes += tx_buffer->bytecount;
+ total_packets += tx_buffer->gso_segs;
+
+ /* free the skb */
+ napi_consume_skb(tx_buffer->skb, napi_budget);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* clear tx_buffer data */
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ /* clear last DMA location and unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IGC_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len)) {
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+ }
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(!i)) {
+ i -= tx_ring->count;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IGC_TX_DESC(tx_ring, 0);
+ }
+
+ /* issue prefetch for next Tx descriptor */
+ prefetch(tx_desc);
+
+ /* update budget accounting */
+ budget--;
+ } while (likely(budget));
+
+ netdev_tx_completed_queue(txring_txq(tx_ring),
+ total_packets, total_bytes);
+
+ i += tx_ring->count;
+ tx_ring->next_to_clean = i;
+ u64_stats_update_begin(&tx_ring->tx_syncp);
+ tx_ring->tx_stats.bytes += total_bytes;
+ tx_ring->tx_stats.packets += total_packets;
+ u64_stats_update_end(&tx_ring->tx_syncp);
+ q_vector->tx.total_bytes += total_bytes;
+ q_vector->tx.total_packets += total_packets;
+
+ if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+ struct igc_hw *hw = &adapter->hw;
+
+ /* Detect a transmit hang in hardware, this serializes the
+ * check with the clearing of time_stamp and movement of i
+ */
+ clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+ if (tx_buffer->next_to_watch &&
+ time_after(jiffies, tx_buffer->time_stamp +
+ (adapter->tx_timeout_factor * HZ)) &&
+ !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
+ /* detected Tx unit hang */
+ dev_err(tx_ring->dev,
+ "Detected Tx Unit Hang\n"
+ " Tx Queue <%d>\n"
+ " TDH <%x>\n"
+ " TDT <%x>\n"
+ " next_to_use <%x>\n"
+ " next_to_clean <%x>\n"
+ "buffer_info[next_to_clean]\n"
+ " time_stamp <%lx>\n"
+ " next_to_watch <%p>\n"
+ " jiffies <%lx>\n"
+ " desc.status <%x>\n",
+ tx_ring->queue_index,
+ rd32(IGC_TDH(tx_ring->reg_idx)),
+ readl(tx_ring->tail),
+ tx_ring->next_to_use,
+ tx_ring->next_to_clean,
+ tx_buffer->time_stamp,
+ tx_buffer->next_to_watch,
+ jiffies,
+ tx_buffer->next_to_watch->wb.status);
+ netif_stop_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
+
+ /* we are about to reset, no point in enabling stuff */
+ return true;
+ }
+ }
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+ if (unlikely(total_packets &&
+ netif_carrier_ok(tx_ring->netdev) &&
+ igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+ if (__netif_subqueue_stopped(tx_ring->netdev,
+ tx_ring->queue_index) &&
+ !(test_bit(__IGC_DOWN, &adapter->state))) {
+ netif_wake_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
+
+ u64_stats_update_begin(&tx_ring->tx_syncp);
+ tx_ring->tx_stats.restart_queue++;
+ u64_stats_update_end(&tx_ring->tx_syncp);
+ }
+ }
+
+ return !!budget;
+}
+
+/**
+ * igc_ioctl - I/O control method
+ * @netdev: network interface device structure
+ * @ifreq: frequency
+ * @cmd: command
+ */
+static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * igc_up - Open the interface and prepare it to handle traffic
+ * @adapter: board private structure
+ */
+static void igc_up(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int i = 0;
+
+ /* hardware has been reset, we need to reload some things */
+ igc_configure(adapter);
+
+ clear_bit(__IGC_DOWN, &adapter->state);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ napi_enable(&adapter->q_vector[i]->napi);
+
+ if (adapter->msix_entries)
+ igc_configure_msix(adapter);
+ else
+ igc_assign_vector(adapter->q_vector[0], 0);
+
+ /* Clear any pending interrupts. */
+ rd32(IGC_ICR);
+ igc_irq_enable(adapter);
+
+ netif_tx_start_all_queues(adapter->netdev);
+
+ /* start the watchdog. */
+ hw->mac.get_link_status = 1;
+ schedule_work(&adapter->watchdog_task);
+}
+
+/**
+ * igc_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ */
+static void igc_update_stats(struct igc_adapter *adapter)
+{
+}
+
+static void igc_nfc_filter_exit(struct igc_adapter *adapter)
+{
+}
+
+/**
+ * igc_down - Close the interface
+ * @adapter: board private structure
+ */
+static void igc_down(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct igc_hw *hw = &adapter->hw;
+ u32 tctl, rctl;
+ int i = 0;
+
+ set_bit(__IGC_DOWN, &adapter->state);
+
+ /* disable receives in the hardware */
+ rctl = rd32(IGC_RCTL);
+ wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
+ /* flush and sleep below */
+
+ igc_nfc_filter_exit(adapter);
+
+ /* set trans_start so we don't get spurious watchdogs during reset */
+ netif_trans_update(netdev);
+
+ netif_carrier_off(netdev);
+ netif_tx_stop_all_queues(netdev);
+
+ /* disable transmits in the hardware */
+ tctl = rd32(IGC_TCTL);
+ tctl &= ~IGC_TCTL_EN;
+ wr32(IGC_TCTL, tctl);
+ /* flush both disables and wait for them to finish */
+ wrfl();
+ usleep_range(10000, 20000);
+
+ igc_irq_disable(adapter);
+
+ adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ if (adapter->q_vector[i]) {
+ napi_synchronize(&adapter->q_vector[i]->napi);
+ napi_disable(&adapter->q_vector[i]->napi);
+ }
+ }
+
+ del_timer_sync(&adapter->watchdog_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ /* record the stats before reset*/
+ spin_lock(&adapter->stats64_lock);
+ igc_update_stats(adapter);
+ spin_unlock(&adapter->stats64_lock);
+
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+
+ if (!pci_channel_offline(adapter->pdev))
+ igc_reset(adapter);
+
+ /* clear VLAN promisc flag so VFTA will be updated if necessary */
+ adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
+
+ igc_clean_all_tx_rings(adapter);
+ igc_clean_all_rx_rings(adapter);
+}
+
+static void igc_reinit_locked(struct igc_adapter *adapter)
+{
+ WARN_ON(in_interrupt());
+ while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+ igc_down(adapter);
+ igc_up(adapter);
+ clear_bit(__IGC_RESETTING, &adapter->state);
+}
+
+static void igc_reset_task(struct work_struct *work)
+{
+ struct igc_adapter *adapter;
+
+ adapter = container_of(work, struct igc_adapter, reset_task);
+
+ netdev_err(adapter->netdev, "Reset adapter\n");
+ igc_reinit_locked(adapter);
+}
+
+/**
+ * igc_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct pci_dev *pdev = adapter->pdev;
+
+ /* adjust max frame to be at least the size of a standard frame */
+ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+ max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+ while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+ usleep_range(1000, 2000);
+
+ /* igc_down has a dependency on max_frame_size */
+ adapter->max_frame_size = max_frame;
+
+ if (netif_running(netdev))
+ igc_down(adapter);
+
+ dev_info(&pdev->dev, "changing MTU from %d to %d\n",
+ netdev->mtu, new_mtu);
+ netdev->mtu = new_mtu;
+
+ if (netif_running(netdev))
+ igc_up(adapter);
+ else
+ igc_reset(adapter);
+
+ clear_bit(__IGC_RESETTING, &adapter->state);
+
+ return 0;
+}
+
+/**
+ * igc_get_stats - Get System Network Statistics
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are updated here and also from the timer callback.
+ */
+static struct net_device_stats *igc_get_stats(struct net_device *netdev)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ if (!test_bit(__IGC_RESETTING, &adapter->state))
+ igc_update_stats(adapter);
+
+ /* only return the current stats */
+ return &netdev->stats;
+}
+
+/**
+ * igc_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ */
+static void igc_configure(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int i = 0;
+
+ igc_get_hw_control(adapter);
+ igc_set_rx_mode(netdev);
+
+ igc_setup_tctl(adapter);
+ igc_setup_mrqc(adapter);
+ igc_setup_rctl(adapter);
+
+ igc_configure_tx(adapter);
+ igc_configure_rx(adapter);
+
+ igc_rx_fifo_flush_base(&adapter->hw);
+
+ /* call igc_desc_unused which always leaves
+ * at least 1 descriptor unused to make sure
+ * next_to_use != next_to_clean
+ */
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igc_ring *ring = adapter->rx_ring[i];
+
+ igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+ }
+}
+
+/**
+ * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
+ * @adapter: Pointer to adapter structure
+ * @index: Index of the RAR entry which need to be synced with MAC table
+ */
+static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
+{
+ u8 *addr = adapter->mac_table[index].addr;
+ struct igc_hw *hw = &adapter->hw;
+ u32 rar_low, rar_high;
+
+ /* HW expects these to be in network order when they are plugged
+ * into the registers which are little endian. In order to guarantee
+ * that ordering we need to do an leXX_to_cpup here in order to be
+ * ready for the byteswap that occurs with writel
+ */
+ rar_low = le32_to_cpup((__le32 *)(addr));
+ rar_high = le16_to_cpup((__le16 *)(addr + 4));
+
+ /* Indicate to hardware the Address is Valid. */
+ if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
+ if (is_valid_ether_addr(addr))
+ rar_high |= IGC_RAH_AV;
+
+ rar_high |= IGC_RAH_POOL_1 <<
+ adapter->mac_table[index].queue;
+ }
+
+ wr32(IGC_RAL(index), rar_low);
+ wrfl();
+ wr32(IGC_RAH(index), rar_high);
+ wrfl();
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igc_set_default_mac_filter(struct igc_adapter *adapter)
+{
+ struct igc_mac_addr *mac_table = &adapter->mac_table[0];
+
+ ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+ mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+
+ igc_rar_set_index(adapter, 0);
+}
+
+/**
+ * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated. This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ */
+static void igc_set_rx_mode(struct net_device *netdev)
+{
+}
+
+/**
+ * igc_msix_other - msix other interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t igc_msix_other(int irq, void *data)
+{
+ struct igc_adapter *adapter = data;
+ struct igc_hw *hw = &adapter->hw;
+ u32 icr = rd32(IGC_ICR);
+
+ /* reading ICR causes bit 31 of EICR to be cleared */
+ if (icr & IGC_ICR_DRSTA)
+ schedule_work(&adapter->reset_task);
+
+ if (icr & IGC_ICR_DOUTSYNC) {
+ /* HW is reporting DMA is out of sync */
+ adapter->stats.doosync++;
+ }
+
+ if (icr & IGC_ICR_LSC) {
+ hw->mac.get_link_status = 1;
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__IGC_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+ wr32(IGC_EIMS, adapter->eims_other);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * igc_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ */
+static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
+ int index, int offset)
+{
+ u32 ivar = array_rd32(IGC_IVAR0, index);
+
+ /* clear any bits that are currently set */
+ ivar &= ~((u32)0xFF << offset);
+
+ /* write vector and valid bit */
+ ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
+
+ array_wr32(IGC_IVAR0, index, ivar);
+}
+
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ struct igc_hw *hw = &adapter->hw;
+ int rx_queue = IGC_N0_QUEUE;
+ int tx_queue = IGC_N0_QUEUE;
+
+ if (q_vector->rx.ring)
+ rx_queue = q_vector->rx.ring->reg_idx;
+ if (q_vector->tx.ring)
+ tx_queue = q_vector->tx.ring->reg_idx;
+
+ switch (hw->mac.type) {
+ case igc_i225:
+ if (rx_queue > IGC_N0_QUEUE)
+ igc_write_ivar(hw, msix_vector,
+ rx_queue >> 1,
+ (rx_queue & 0x1) << 4);
+ if (tx_queue > IGC_N0_QUEUE)
+ igc_write_ivar(hw, msix_vector,
+ tx_queue >> 1,
+ ((tx_queue & 0x1) << 4) + 8);
+ q_vector->eims_value = BIT(msix_vector);
+ break;
+ default:
+ WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
+ break;
+ }
+
+ /* add q_vector eims value to global eims_enable_mask */
+ adapter->eims_enable_mask |= q_vector->eims_value;
+
+ /* configure q_vector to set itr on first interrupt */
+ q_vector->set_itr = 1;
+}
+
+/**
+ * igc_configure_msix - Configure MSI-X hardware
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ */
+static void igc_configure_msix(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int i, vector = 0;
+ u32 tmp;
+
+ adapter->eims_enable_mask = 0;
+
+ /* set vector for other causes, i.e. link changes */
+ switch (hw->mac.type) {
+ case igc_i225:
+ /* Turn on MSI-X capability first, or our settings
+ * won't stick. And it will take days to debug.
+ */
+ wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
+ IGC_GPIE_PBA | IGC_GPIE_EIAME |
+ IGC_GPIE_NSICR);
+
+ /* enable msix_other interrupt */
+ adapter->eims_other = BIT(vector);
+ tmp = (vector++ | IGC_IVAR_VALID) << 8;
+
+ wr32(IGC_IVAR_MISC, tmp);
+ break;
+ default:
+ /* do nothing, since nothing else supports MSI-X */
+ break;
+ } /* switch (hw->mac.type) */
+
+ adapter->eims_enable_mask |= adapter->eims_other;
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ igc_assign_vector(adapter->q_vector[i], vector++);
+
+ wrfl();
+}
+
+static irqreturn_t igc_msix_ring(int irq, void *data)
+{
+ struct igc_q_vector *q_vector = data;
+
+ /* Write the ITR value calculated from the previous interrupt. */
+ igc_write_itr(q_vector);
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * igc_request_msix - Initialize MSI-X interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ */
+static int igc_request_msix(struct igc_adapter *adapter)
+{
+ int i = 0, err = 0, vector = 0, free_vector = 0;
+ struct net_device *netdev = adapter->netdev;
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ &igc_msix_other, 0, netdev->name, adapter);
+ if (err)
+ goto err_out;
+
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+ vector++;
+
+ q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
+
+ if (q_vector->rx.ring && q_vector->tx.ring)
+ sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+ q_vector->rx.ring->queue_index);
+ else if (q_vector->tx.ring)
+ sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+ q_vector->tx.ring->queue_index);
+ else if (q_vector->rx.ring)
+ sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+ q_vector->rx.ring->queue_index);
+ else
+ sprintf(q_vector->name, "%s-unused", netdev->name);
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ igc_msix_ring, 0, q_vector->name,
+ q_vector);
+ if (err)
+ goto err_free;
+ }
+
+ igc_configure_msix(adapter);
+ return 0;
+
+err_free:
+ /* free already assigned IRQs */
+ free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+ vector--;
+ for (i = 0; i < vector; i++) {
+ free_irq(adapter->msix_entries[free_vector++].vector,
+ adapter->q_vector[i]);
+ }
+err_out:
+ return err;
+}
+
+/**
+ * igc_reset_q_vector - Reset config for interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be reset
+ *
+ * If NAPI is enabled it will delete any references to the
+ * NAPI struct. This is preparation for igc_free_q_vector.
+ */
+static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+ /* if we're coming from igc_set_interrupt_capability, the vectors are
+ * not yet allocated
+ */
+ if (!q_vector)
+ return;
+
+ if (q_vector->tx.ring)
+ adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+ if (q_vector->rx.ring)
+ adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+ netif_napi_del(&q_vector->napi);
+}
+
+static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
+{
+ int v_idx = adapter->num_q_vectors;
+
+ if (adapter->msix_entries) {
+ pci_disable_msix(adapter->pdev);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+ } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
+ pci_disable_msi(adapter->pdev);
+ }
+
+ while (v_idx--)
+ igc_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * This function resets the device so that it has 0 rx queues, tx queues, and
+ * MSI-X interrupts allocated.
+ */
+static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
+{
+ igc_free_q_vectors(adapter);
+ igc_reset_interrupt_capability(adapter);
+}
+
+/**
+ * igc_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ */
+static void igc_free_q_vectors(struct igc_adapter *adapter)
+{
+ int v_idx = adapter->num_q_vectors;
+
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--) {
+ igc_reset_q_vector(adapter, v_idx);
+ igc_free_q_vector(adapter, v_idx);
+ }
+}
+
+/**
+ * igc_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.
+ */
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+ adapter->q_vector[v_idx] = NULL;
+
+ /* igc_get_stats64() might access the rings on this vector,
+ * we must wait a grace period before freeing it.
+ */
+ if (q_vector)
+ kfree_rcu(q_vector, rcu);
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ */
+static void igc_update_phy_info(struct timer_list *t)
+{
+ struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
+
+ igc_get_phy_info(&adapter->hw);
+}
+
+/**
+ * igc_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ */
+static bool igc_has_link(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ bool link_active = false;
+
+ /* get_link_status is set on LSC (link status) interrupt or
+ * rx sequence error interrupt. get_link_status will stay
+ * false until the igc_check_for_link establishes link
+ * for copper adapters ONLY
+ */
+ switch (hw->phy.media_type) {
+ case igc_media_type_copper:
+ if (!hw->mac.get_link_status)
+ return true;
+ hw->mac.ops.check_for_link(hw);
+ link_active = !hw->mac.get_link_status;
+ break;
+ default:
+ case igc_media_type_unknown:
+ break;
+ }
+
+ if (hw->mac.type == igc_i225 &&
+ hw->phy.id == I225_I_PHY_ID) {
+ if (!netif_carrier_ok(adapter->netdev)) {
+ adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+ } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
+ adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
+ adapter->link_check_timeout = jiffies;
+ }
+ }
+
+ return link_active;
+}
+
+/**
+ * igc_watchdog - Timer Call-back
+ * @data: pointer to adapter cast into an unsigned long
+ */
+static void igc_watchdog(struct timer_list *t)
+{
+ struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+ /* Do the rest outside of interrupt context */
+ schedule_work(&adapter->watchdog_task);
+}
+
+static void igc_watchdog_task(struct work_struct *work)
+{
+ struct igc_adapter *adapter = container_of(work,
+ struct igc_adapter,
+ watchdog_task);
+ struct net_device *netdev = adapter->netdev;
+ struct igc_hw *hw = &adapter->hw;
+ struct igc_phy_info *phy = &hw->phy;
+ u16 phy_data, retry_count = 20;
+ u32 connsw;
+ u32 link;
+ int i;
+
+ link = igc_has_link(adapter);
+
+ if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
+ if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+ adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+ else
+ link = false;
+ }
+
+ /* Force link down if we have fiber to swap to */
+ if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
+ if (hw->phy.media_type == igc_media_type_copper) {
+ connsw = rd32(IGC_CONNSW);
+ if (!(connsw & IGC_CONNSW_AUTOSENSE_EN))
+ link = 0;
+ }
+ }
+ if (link) {
+ if (!netif_carrier_ok(netdev)) {
+ u32 ctrl;
+
+ hw->mac.ops.get_speed_and_duplex(hw,
+ &adapter->link_speed,
+ &adapter->link_duplex);
+
+ ctrl = rd32(IGC_CTRL);
+ /* Link status message must follow this format */
+ netdev_info(netdev,
+ "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+ netdev->name,
+ adapter->link_speed,
+ adapter->link_duplex == FULL_DUPLEX ?
+ "Full" : "Half",
+ (ctrl & IGC_CTRL_TFCE) &&
+ (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
+ (ctrl & IGC_CTRL_RFCE) ? "RX" :
+ (ctrl & IGC_CTRL_TFCE) ? "TX" : "None");
+
+ /* check if SmartSpeed worked */
+ igc_check_downshift(hw);
+ if (phy->speed_downgraded)
+ netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
+
+ /* adjust timeout factor according to speed/duplex */
+ adapter->tx_timeout_factor = 1;
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ adapter->tx_timeout_factor = 14;
+ break;
+ case SPEED_100:
+ /* maybe add some timeout factor ? */
+ break;
+ }
+
+ if (adapter->link_speed != SPEED_1000)
+ goto no_wait;
+
+ /* wait for Remote receiver status OK */
+retry_read_status:
+ if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
+ &phy_data)) {
+ if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
+ retry_count) {
+ msleep(100);
+ retry_count--;
+ goto retry_read_status;
+ } else if (!retry_count) {
+ dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
+ }
+ } else {
+ dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
+ }
+no_wait:
+ netif_carrier_on(netdev);
+
+ /* link state has changed, schedule phy info update */
+ if (!test_bit(__IGC_DOWN, &adapter->state))
+ mod_timer(&adapter->phy_info_timer,
+ round_jiffies(jiffies + 2 * HZ));
+ }
+ } else {
+ if (netif_carrier_ok(netdev)) {
+ adapter->link_speed = 0;
+ adapter->link_duplex = 0;
+
+ /* Links status message must follow this format */
+ netdev_info(netdev, "igc: %s NIC Link is Down\n",
+ netdev->name);
+ netif_carrier_off(netdev);
+
+ /* link state has changed, schedule phy info update */
+ if (!test_bit(__IGC_DOWN, &adapter->state))
+ mod_timer(&adapter->phy_info_timer,
+ round_jiffies(jiffies + 2 * HZ));
+
+ /* link is down, time to check for alternate media */
+ if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
+ if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
+ schedule_work(&adapter->reset_task);
+ /* return immediately */
+ return;
+ }
+ }
+
+ /* also check for alternate media here */
+ } else if (!netif_carrier_ok(netdev) &&
+ (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
+ if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
+ schedule_work(&adapter->reset_task);
+ /* return immediately */
+ return;
+ }
+ }
+ }
+
+ spin_lock(&adapter->stats64_lock);
+ igc_update_stats(adapter);
+ spin_unlock(&adapter->stats64_lock);
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+ if (!netif_carrier_ok(netdev)) {
+ /* We've lost link, so the controller stops DMA,
+ * but we've got queued Tx work that's never going
+ * to get done, so reset controller to flush Tx.
+ * (Do the reset outside of interrupt context).
+ */
+ if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
+ adapter->tx_timeout_count++;
+ schedule_work(&adapter->reset_task);
+ /* return immediately since reset is imminent */
+ return;
+ }
+ }
+
+ /* Force detection of hung controller every watchdog period */
+ set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+ }
+
+ /* Cause software interrupt to ensure Rx ring is cleaned */
+ if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+ u32 eics = 0;
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ eics |= adapter->q_vector[i]->eims_value;
+ wr32(IGC_EICS, eics);
+ } else {
+ wr32(IGC_ICS, IGC_ICS_RXDMT0);
+ }
+
+ /* Reset the timer */
+ if (!test_bit(__IGC_DOWN, &adapter->state)) {
+ if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
+ mod_timer(&adapter->watchdog_timer,
+ round_jiffies(jiffies + HZ));
+ else
+ mod_timer(&adapter->watchdog_timer,
+ round_jiffies(jiffies + 2 * HZ));
+ }
+}
+
+/**
+ * igc_update_ring_itr - update the dynamic ITR value based on packet size
+ * @q_vector: pointer to q_vector
+ *
+ * Stores a new ITR value based on strictly on packet size. This
+ * algorithm is less sophisticated than that used in igc_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings. The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ */
+static void igc_update_ring_itr(struct igc_q_vector *q_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ int new_val = q_vector->itr_val;
+ int avg_wire_size = 0;
+ unsigned int packets;
+
+ /* For non-gigabit speeds, just fix the interrupt rate at 4000
+ * ints/sec - ITR timer value of 120 ticks.
+ */
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ new_val = IGC_4K_ITR;
+ goto set_itr_val;
+ default:
+ break;
+ }
+
+ packets = q_vector->rx.total_packets;
+ if (packets)
+ avg_wire_size = q_vector->rx.total_bytes / packets;
+
+ packets = q_vector->tx.total_packets;
+ if (packets)
+ avg_wire_size = max_t(u32, avg_wire_size,
+ q_vector->tx.total_bytes / packets);
+
+ /* if avg_wire_size isn't set no work was done */
+ if (!avg_wire_size)
+ goto clear_counts;
+
+ /* Add 24 bytes to size to account for CRC, preamble, and gap */
+ avg_wire_size += 24;
+
+ /* Don't starve jumbo frames */
+ avg_wire_size = min(avg_wire_size, 3000);
+
+ /* Give a little boost to mid-size frames */
+ if (avg_wire_size > 300 && avg_wire_size < 1200)
+ new_val = avg_wire_size / 3;
+ else
+ new_val = avg_wire_size / 2;
+
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (new_val < IGC_20K_ITR &&
+ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+ (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+ new_val = IGC_20K_ITR;
+
+set_itr_val:
+ if (new_val != q_vector->itr_val) {
+ q_vector->itr_val = new_val;
+ q_vector->set_itr = 1;
+ }
+clear_counts:
+ q_vector->rx.total_bytes = 0;
+ q_vector->rx.total_packets = 0;
+ q_vector->tx.total_bytes = 0;
+ q_vector->tx.total_packets = 0;
+}
+
+/**
+ * igc_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ */
+static void igc_update_itr(struct igc_q_vector *q_vector,
+ struct igc_ring_container *ring_container)
+{
+ unsigned int packets = ring_container->total_packets;
+ unsigned int bytes = ring_container->total_bytes;
+ u8 itrval = ring_container->itr;
+
+ /* no packets, exit with status unchanged */
+ if (packets == 0)
+ return;
+
+ switch (itrval) {
+ case lowest_latency:
+ /* handle TSO and jumbo frames */
+ if (bytes / packets > 8000)
+ itrval = bulk_latency;
+ else if ((packets < 5) && (bytes > 512))
+ itrval = low_latency;
+ break;
+ case low_latency: /* 50 usec aka 20000 ints/s */
+ if (bytes > 10000) {
+ /* this if handles the TSO accounting */
+ if (bytes / packets > 8000)
+ itrval = bulk_latency;
+ else if ((packets < 10) || ((bytes / packets) > 1200))
+ itrval = bulk_latency;
+ else if ((packets > 35))
+ itrval = lowest_latency;
+ } else if (bytes / packets > 2000) {
+ itrval = bulk_latency;
+ } else if (packets <= 2 && bytes < 512) {
+ itrval = lowest_latency;
+ }
+ break;
+ case bulk_latency: /* 250 usec aka 4000 ints/s */
+ if (bytes > 25000) {
+ if (packets > 35)
+ itrval = low_latency;
+ } else if (bytes < 1500) {
+ itrval = low_latency;
+ }
+ break;
+ }
+
+ /* clear work counters since we have the values we need */
+ ring_container->total_bytes = 0;
+ ring_container->total_packets = 0;
+
+ /* write updated itr to ring container */
+ ring_container->itr = itrval;
+}
+
+/**
+ * igc_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr_msi(int irq, void *data)
+{
+ struct igc_adapter *adapter = data;
+ struct igc_q_vector *q_vector = adapter->q_vector[0];
+ struct igc_hw *hw = &adapter->hw;
+ /* read ICR disables interrupts using IAM */
+ u32 icr = rd32(IGC_ICR);
+
+ igc_write_itr(q_vector);
+
+ if (icr & IGC_ICR_DRSTA)
+ schedule_work(&adapter->reset_task);
+
+ if (icr & IGC_ICR_DOUTSYNC) {
+ /* HW is reporting DMA is out of sync */
+ adapter->stats.doosync++;
+ }
+
+ if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+ hw->mac.get_link_status = 1;
+ if (!test_bit(__IGC_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * igc_intr - Legacy Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr(int irq, void *data)
+{
+ struct igc_adapter *adapter = data;
+ struct igc_q_vector *q_vector = adapter->q_vector[0];
+ struct igc_hw *hw = &adapter->hw;
+ /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
+ * need for the IMC write
+ */
+ u32 icr = rd32(IGC_ICR);
+
+ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+ * not set, then the adapter didn't send an interrupt
+ */
+ if (!(icr & IGC_ICR_INT_ASSERTED))
+ return IRQ_NONE;
+
+ igc_write_itr(q_vector);
+
+ if (icr & IGC_ICR_DRSTA)
+ schedule_work(&adapter->reset_task);
+
+ if (icr & IGC_ICR_DOUTSYNC) {
+ /* HW is reporting DMA is out of sync */
+ adapter->stats.doosync++;
+ }
+
+ if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+ hw->mac.get_link_status = 1;
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__IGC_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
+
+ napi_schedule(&q_vector->napi);
+
+ return IRQ_HANDLED;
+}
+
+static void igc_set_itr(struct igc_q_vector *q_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ u32 new_itr = q_vector->itr_val;
+ u8 current_itr = 0;
+
+ /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ current_itr = 0;
+ new_itr = IGC_4K_ITR;
+ goto set_itr_now;
+ default:
+ break;
+ }
+
+ igc_update_itr(q_vector, &q_vector->tx);
+ igc_update_itr(q_vector, &q_vector->rx);
+
+ current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (current_itr == lowest_latency &&
+ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+ (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+ current_itr = low_latency;
+
+ switch (current_itr) {
+ /* counts and packets in update_itr are dependent on these numbers */
+ case lowest_latency:
+ new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
+ break;
+ case low_latency:
+ new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
+ break;
+ case bulk_latency:
+ new_itr = IGC_4K_ITR; /* 4,000 ints/sec */
+ break;
+ default:
+ break;
+ }
+
+set_itr_now:
+ if (new_itr != q_vector->itr_val) {
+ /* this attempts to bias the interrupt rate towards Bulk
+ * by adding intermediate steps when interrupt rate is
+ * increasing
+ */
+ new_itr = new_itr > q_vector->itr_val ?
+ max((new_itr * q_vector->itr_val) /
+ (new_itr + (q_vector->itr_val >> 2)),
+ new_itr) : new_itr;
+ /* Don't write the value here; it resets the adapter's
+ * internal timer, and causes us to delay far longer than
+ * we should between interrupts. Instead, we write the ITR
+ * value at the beginning of the next interrupt so the timing
+ * ends up being correct.
+ */
+ q_vector->itr_val = new_itr;
+ q_vector->set_itr = 1;
+ }
+}
+
+static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ struct igc_hw *hw = &adapter->hw;
+
+ if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+ (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+ if (adapter->num_q_vectors == 1)
+ igc_set_itr(q_vector);
+ else
+ igc_update_ring_itr(q_vector);
+ }
+
+ if (!test_bit(__IGC_DOWN, &adapter->state)) {
+ if (adapter->msix_entries)
+ wr32(IGC_EIMS, q_vector->eims_value);
+ else
+ igc_irq_enable(adapter);
+ }
+}
+
+/**
+ * igc_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ */
+static int igc_poll(struct napi_struct *napi, int budget)
+{
+ struct igc_q_vector *q_vector = container_of(napi,
+ struct igc_q_vector,
+ napi);
+ bool clean_complete = true;
+ int work_done = 0;
+
+ if (q_vector->tx.ring)
+ clean_complete = igc_clean_tx_irq(q_vector, budget);
+
+ if (q_vector->rx.ring) {
+ int cleaned = igc_clean_rx_irq(q_vector, budget);
+
+ work_done += cleaned;
+ if (cleaned >= budget)
+ clean_complete = false;
+ }
+
+ /* If all work not completed, return budget and keep polling */
+ if (!clean_complete)
+ return budget;
+
+ /* If not enough Rx work done, exit the polling mode */
+ napi_complete_done(napi, work_done);
+ igc_ring_irq_enable(q_vector);
+
+ return 0;
+}
+
+/**
+ * igc_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+ bool msix)
+{
+ int numvecs, i;
+ int err;
+
+ if (!msix)
+ goto msi_only;
+ adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+ /* Number of supported queues. */
+ adapter->num_rx_queues = adapter->rss_queues;
+
+ adapter->num_tx_queues = adapter->rss_queues;
+
+ /* start with one vector for every Rx queue */
+ numvecs = adapter->num_rx_queues;
+
+ /* if Tx handler is separate add 1 for every Tx queue */
+ if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
+ numvecs += adapter->num_tx_queues;
+
+ /* store the number of vectors reserved for queues */
+ adapter->num_q_vectors = numvecs;
+
+ /* add 1 vector for link status interrupts */
+ numvecs++;
+
+ adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
+ GFP_KERNEL);
+
+ if (!adapter->msix_entries)
+ return;
+
+ /* populate entry values */
+ for (i = 0; i < numvecs; i++)
+ adapter->msix_entries[i].entry = i;
+
+ err = pci_enable_msix_range(adapter->pdev,
+ adapter->msix_entries,
+ numvecs,
+ numvecs);
+ if (err > 0)
+ return;
+
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+
+ igc_reset_interrupt_capability(adapter);
+
+msi_only:
+ adapter->flags &= ~IGC_FLAG_HAS_MSIX;
+
+ adapter->rss_queues = 1;
+ adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+ adapter->num_rx_queues = 1;
+ adapter->num_tx_queues = 1;
+ adapter->num_q_vectors = 1;
+ if (!pci_enable_msi(adapter->pdev))
+ adapter->flags |= IGC_FLAG_HAS_MSI;
+}
+
+static void igc_add_ring(struct igc_ring *ring,
+ struct igc_ring_container *head)
+{
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * igc_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int igc_alloc_q_vector(struct igc_adapter *adapter,
+ unsigned int v_count, unsigned int v_idx,
+ unsigned int txr_count, unsigned int txr_idx,
+ unsigned int rxr_count, unsigned int rxr_idx)
+{
+ struct igc_q_vector *q_vector;
+ struct igc_ring *ring;
+ int ring_count, size;
+
+ /* igc only supports 1 Tx and/or 1 Rx queue per vector */
+ if (txr_count > 1 || rxr_count > 1)
+ return -ENOMEM;
+
+ ring_count = txr_count + rxr_count;
+ size = sizeof(struct igc_q_vector) +
+ (sizeof(struct igc_ring) * ring_count);
+
+ /* allocate q_vector and rings */
+ q_vector = adapter->q_vector[v_idx];
+ if (!q_vector)
+ q_vector = kzalloc(size, GFP_KERNEL);
+ else
+ memset(q_vector, 0, size);
+ if (!q_vector)
+ return -ENOMEM;
+
+ /* initialize NAPI */
+ netif_napi_add(adapter->netdev, &q_vector->napi,
+ igc_poll, 64);
+
+ /* tie q_vector and adapter together */
+ adapter->q_vector[v_idx] = q_vector;
+ q_vector->adapter = adapter;
+
+ /* initialize work limits */
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+
+ /* initialize ITR configuration */
+ q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
+ q_vector->itr_val = IGC_START_ITR;
+
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+
+ /* initialize ITR */
+ if (rxr_count) {
+ /* rx or rx/tx vector */
+ if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+ q_vector->itr_val = adapter->rx_itr_setting;
+ } else {
+ /* tx only vector */
+ if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+ q_vector->itr_val = adapter->tx_itr_setting;
+ }
+
+ if (txr_count) {
+ /* assign generic ring traits */
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ igc_add_ring(ring, &q_vector->tx);
+
+ /* apply Tx specific ring traits */
+ ring->count = adapter->tx_ring_count;
+ ring->queue_index = txr_idx;
+
+ /* assign ring to adapter */
+ adapter->tx_ring[txr_idx] = ring;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ if (rxr_count) {
+ /* assign generic ring traits */
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ igc_add_ring(ring, &q_vector->rx);
+
+ /* apply Rx specific ring traits */
+ ring->count = adapter->rx_ring_count;
+ ring->queue_index = rxr_idx;
+
+ /* assign ring to adapter */
+ adapter->rx_ring[rxr_idx] = ring;
+ }
+
+ return 0;
+}
+
+/**
+ * igc_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+static int igc_alloc_q_vectors(struct igc_adapter *adapter)
+{
+ int rxr_remaining = adapter->num_rx_queues;
+ int txr_remaining = adapter->num_tx_queues;
+ int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ int q_vectors = adapter->num_q_vectors;
+ int err;
+
+ if (q_vectors >= (rxr_remaining + txr_remaining)) {
+ for (; rxr_remaining; v_idx++) {
+ err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+ 0, 0, 1, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining--;
+ rxr_idx++;
+ }
+ }
+
+ for (; v_idx < q_vectors; v_idx++) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+ err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+ tqpv, txr_idx, rqpv, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ txr_remaining -= tqpv;
+ rxr_idx++;
+ txr_idx++;
+ }
+
+ return 0;
+
+err_out:
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--)
+ igc_free_q_vector(adapter, v_idx);
+
+ return -ENOMEM;
+}
+
+/**
+ * igc_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ */
+static void igc_cache_ring_register(struct igc_adapter *adapter)
+{
+ int i = 0, j = 0;
+
+ switch (adapter->hw.mac.type) {
+ case igc_i225:
+ /* Fall through */
+ default:
+ for (; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->reg_idx = i;
+ for (; j < adapter->num_tx_queues; j++)
+ adapter->tx_ring[j]->reg_idx = j;
+ break;
+ }
+}
+
+/**
+ * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ * @adapter: Pointer to adapter structure
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ */
+static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int err = 0;
+
+ igc_set_interrupt_capability(adapter, msix);
+
+ err = igc_alloc_q_vectors(adapter);
+ if (err) {
+ dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+ goto err_alloc_q_vectors;
+ }
+
+ igc_cache_ring_register(adapter);
+
+ return 0;
+
+err_alloc_q_vectors:
+ igc_reset_interrupt_capability(adapter);
+ return err;
+}
+
+static void igc_free_irq(struct igc_adapter *adapter)
+{
+ if (adapter->msix_entries) {
+ int vector = 0, i;
+
+ free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ free_irq(adapter->msix_entries[vector++].vector,
+ adapter->q_vector[i]);
+ } else {
+ free_irq(adapter->pdev->irq, adapter);
+ }
+}
+
+/**
+ * igc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ */
+static void igc_irq_disable(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+
+ if (adapter->msix_entries) {
+ u32 regval = rd32(IGC_EIAM);
+
+ wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
+ wr32(IGC_EIMC, adapter->eims_enable_mask);
+ regval = rd32(IGC_EIAC);
+ wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
+ }
+
+ wr32(IGC_IAM, 0);
+ wr32(IGC_IMC, ~0);
+ wrfl();
+
+ if (adapter->msix_entries) {
+ int vector = 0, i;
+
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+ } else {
+ synchronize_irq(adapter->pdev->irq);
+ }
+}
+
+/**
+ * igc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ */
+static void igc_irq_enable(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+
+ if (adapter->msix_entries) {
+ u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
+ u32 regval = rd32(IGC_EIAC);
+
+ wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
+ regval = rd32(IGC_EIAM);
+ wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
+ wr32(IGC_EIMS, adapter->eims_enable_mask);
+ wr32(IGC_IMS, ims);
+ } else {
+ wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+ wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+ }
+}
+
+/**
+ * igc_request_irq - initialize interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static int igc_request_irq(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ int err = 0;
+
+ if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+ err = igc_request_msix(adapter);
+ if (!err)
+ goto request_done;
+ /* fall back to MSI */
+ igc_free_all_tx_resources(adapter);
+ igc_free_all_rx_resources(adapter);
+
+ igc_clear_interrupt_scheme(adapter);
+ err = igc_init_interrupt_scheme(adapter, false);
+ if (err)
+ goto request_done;
+ igc_setup_all_tx_resources(adapter);
+ igc_setup_all_rx_resources(adapter);
+ igc_configure(adapter);
+ }
+
+ igc_assign_vector(adapter->q_vector[0], 0);
+
+ if (adapter->flags & IGC_FLAG_HAS_MSI) {
+ err = request_irq(pdev->irq, &igc_intr_msi, 0,
+ netdev->name, adapter);
+ if (!err)
+ goto request_done;
+
+ /* fall back to legacy interrupts */
+ igc_reset_interrupt_capability(adapter);
+ adapter->flags &= ~IGC_FLAG_HAS_MSI;
+ }
+
+ err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
+ netdev->name, adapter);
+
+ if (err)
+ dev_err(&pdev->dev, "Error %d getting interrupt\n",
+ err);
+
+request_done:
+ return err;
+}
+
+static void igc_write_itr(struct igc_q_vector *q_vector)
+{
+ u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
+
+ if (!q_vector->set_itr)
+ return;
+
+ if (!itr_val)
+ itr_val = IGC_ITR_VAL_MASK;
+
+ itr_val |= IGC_EITR_CNT_IGNR;
+
+ writel(itr_val, q_vector->itr_register);
+ q_vector->set_itr = 0;
+}
+
+/**
+ * igc_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int __igc_open(struct net_device *netdev, bool resuming)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct igc_hw *hw = &adapter->hw;
+ int err = 0;
+ int i = 0;
+
+ /* disallow open during test */
+
+ if (test_bit(__IGC_TESTING, &adapter->state)) {
+ WARN_ON(resuming);
+ return -EBUSY;
+ }
+
+ netif_carrier_off(netdev);
+
+ /* allocate transmit descriptors */
+ err = igc_setup_all_tx_resources(adapter);
+ if (err)
+ goto err_setup_tx;
+
+ /* allocate receive descriptors */
+ err = igc_setup_all_rx_resources(adapter);
+ if (err)
+ goto err_setup_rx;
+
+ igc_power_up_link(adapter);
+
+ igc_configure(adapter);
+
+ err = igc_request_irq(adapter);
+ if (err)
+ goto err_req_irq;
+
+ /* Notify the stack of the actual queue counts. */
+ netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+ if (err)
+ goto err_set_queues;
+
+ err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+ if (err)
+ goto err_set_queues;
+
+ clear_bit(__IGC_DOWN, &adapter->state);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ napi_enable(&adapter->q_vector[i]->napi);
+
+ /* Clear any pending interrupts. */
+ rd32(IGC_ICR);
+ igc_irq_enable(adapter);
+
+ netif_tx_start_all_queues(netdev);
+
+ /* start the watchdog. */
+ hw->mac.get_link_status = 1;
+ schedule_work(&adapter->watchdog_task);
+
+ return IGC_SUCCESS;
+
+err_set_queues:
+ igc_free_irq(adapter);
+err_req_irq:
+ igc_release_hw_control(adapter);
+ igc_power_down_link(adapter);
+ igc_free_all_rx_resources(adapter);
+err_setup_rx:
+ igc_free_all_tx_resources(adapter);
+err_setup_tx:
+ igc_reset(adapter);
+
+ return err;
+}
+
+static int igc_open(struct net_device *netdev)
+{
+ return __igc_open(netdev, false);
+}
+
+/**
+ * igc_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS. The hardware is still under the driver's control, but
+ * needs to be disabled. A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int __igc_close(struct net_device *netdev, bool suspending)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
+
+ igc_down(adapter);
+
+ igc_release_hw_control(adapter);
+
+ igc_free_irq(adapter);
+
+ igc_free_all_tx_resources(adapter);
+ igc_free_all_rx_resources(adapter);
+
+ return 0;
+}
+
+static int igc_close(struct net_device *netdev)
+{
+ if (netif_device_present(netdev) || netdev->dismantle)
+ return __igc_close(netdev, false);
+ return 0;
+}
+
+static const struct net_device_ops igc_netdev_ops = {
+ .ndo_open = igc_open,
+ .ndo_stop = igc_close,
+ .ndo_start_xmit = igc_xmit_frame,
+ .ndo_set_mac_address = igc_set_mac,
+ .ndo_change_mtu = igc_change_mtu,
+ .ndo_get_stats = igc_get_stats,
+ .ndo_do_ioctl = igc_ioctl,
+};
+
+/* PCIe configuration access */
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+ struct igc_adapter *adapter = hw->back;
+
+ pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+ struct igc_adapter *adapter = hw->back;
+
+ pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+ struct igc_adapter *adapter = hw->back;
+ u16 cap_offset;
+
+ cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+ if (!cap_offset)
+ return -IGC_ERR_CONFIG;
+
+ pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+
+ return IGC_SUCCESS;
+}
+
+s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+ struct igc_adapter *adapter = hw->back;
+ u16 cap_offset;
+
+ cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+ if (!cap_offset)
+ return -IGC_ERR_CONFIG;
+
+ pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+
+ return IGC_SUCCESS;
+}
+
+u32 igc_rd32(struct igc_hw *hw, u32 reg)
+{
+ struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
+ u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+ u32 value = 0;
+
+ if (IGC_REMOVED(hw_addr))
+ return ~value;
+
+ value = readl(&hw_addr[reg]);
+
+ /* reads should not return all F's */
+ if (!(~value) && (!reg || !(~readl(hw_addr)))) {
+ struct net_device *netdev = igc->netdev;
+
+ hw->hw_addr = NULL;
+ netif_device_detach(netdev);
+ netdev_err(netdev, "PCIe link lost, device now detached\n");
+ }
+
+ return value;
+}
+
+/**
+ * igc_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igc_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igc_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int igc_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct igc_adapter *adapter;
+ struct net_device *netdev;
+ struct igc_hw *hw;
+ const struct igc_info *ei = igc_info_tbl[ent->driver_data];
+ int err, pci_using_dac;
+
+ err = pci_enable_device_mem(pdev);
+ if (err)
+ return err;
+
+ pci_using_dac = 0;
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+ if (!err) {
+ err = dma_set_coherent_mask(&pdev->dev,
+ DMA_BIT_MASK(64));
+ if (!err)
+ pci_using_dac = 1;
+ } else {
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ err = dma_set_coherent_mask(&pdev->dev,
+ DMA_BIT_MASK(32));
+ if (err) {
+ IGC_ERR("Wrong DMA configuration, aborting\n");
+ goto err_dma;
+ }
+ }
+ }
+
+ err = pci_request_selected_regions(pdev,
+ pci_select_bars(pdev,
+ IORESOURCE_MEM),
+ igc_driver_name);
+ if (err)
+ goto err_pci_reg;
+
+ pci_enable_pcie_error_reporting(pdev);
+
+ pci_set_master(pdev);
+
+ err = -ENOMEM;
+ netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
+ IGC_MAX_TX_QUEUES);
+
+ if (!netdev)
+ goto err_alloc_etherdev;
+
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ pci_set_drvdata(pdev, netdev);
+ adapter = netdev_priv(netdev);
+ adapter->netdev = netdev;
+ adapter->pdev = pdev;
+ hw = &adapter->hw;
+ hw->back = adapter;
+ adapter->port_num = hw->bus.func;
+ adapter->msg_enable = GENMASK(debug - 1, 0);
+
+ err = pci_save_state(pdev);
+ if (err)
+ goto err_ioremap;
+
+ err = -EIO;
+ adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!adapter->io_addr)
+ goto err_ioremap;
+
+ /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
+ hw->hw_addr = adapter->io_addr;
+
+ netdev->netdev_ops = &igc_netdev_ops;
+
+ netdev->watchdog_timeo = 5 * HZ;
+
+ netdev->mem_start = pci_resource_start(pdev, 0);
+ netdev->mem_end = pci_resource_end(pdev, 0);
+
+ /* PCI config space info */
+ hw->vendor_id = pdev->vendor;
+ hw->device_id = pdev->device;
+ hw->revision_id = pdev->revision;
+ hw->subsystem_vendor_id = pdev->subsystem_vendor;
+ hw->subsystem_device_id = pdev->subsystem_device;
+
+ /* Copy the default MAC and PHY function pointers */
+ memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
+ memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
+
+ /* Initialize skew-specific constants */
+ err = ei->get_invariants(hw);
+ if (err)
+ goto err_sw_init;
+
+ /* setup the private structure */
+ err = igc_sw_init(adapter);
+ if (err)
+ goto err_sw_init;
+
+ /* MTU range: 68 - 9216 */
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
+ /* before reading the NVM, reset the controller to put the device in a
+ * known good starting state
+ */
+ hw->mac.ops.reset_hw(hw);
+
+ if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
+ /* copy the MAC address out of the NVM */
+ if (hw->mac.ops.read_mac_addr(hw))
+ dev_err(&pdev->dev, "NVM Read Error\n");
+ }
+
+ memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
+
+ if (!is_valid_ether_addr(netdev->dev_addr)) {
+ dev_err(&pdev->dev, "Invalid MAC Address\n");
+ err = -EIO;
+ goto err_eeprom;
+ }
+
+ /* configure RXPBSIZE and TXPBSIZE */
+ wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
+ wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+
+ timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
+ timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
+
+ INIT_WORK(&adapter->reset_task, igc_reset_task);
+ INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
+
+ /* Initialize link properties that are user-changeable */
+ adapter->fc_autoneg = true;
+ hw->mac.autoneg = true;
+ hw->phy.autoneg_advertised = 0xaf;
+
+ hw->fc.requested_mode = igc_fc_default;
+ hw->fc.current_mode = igc_fc_default;
+
+ /* reset the hardware with the new settings */
+ igc_reset(adapter);
+
+ /* let the f/w know that the h/w is now under the control of the
+ * driver.
+ */
+ igc_get_hw_control(adapter);
+
+ strncpy(netdev->name, "eth%d", IFNAMSIZ);
+ err = register_netdev(netdev);
+ if (err)
+ goto err_register;
+
+ /* carrier off reporting is important to ethtool even BEFORE open */
+ netif_carrier_off(netdev);
+
+ /* Check if Media Autosense is enabled */
+ adapter->ei = *ei;
+
+ /* print pcie link status and MAC address */
+ pcie_print_link_status(pdev);
+ netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
+
+ return 0;
+
+err_register:
+ igc_release_hw_control(adapter);
+err_eeprom:
+ if (!igc_check_reset_block(hw))
+ igc_reset_phy(hw);
+err_sw_init:
+ igc_clear_interrupt_scheme(adapter);
+ iounmap(adapter->io_addr);
+err_ioremap:
+ free_netdev(netdev);
+err_alloc_etherdev:
+ pci_release_selected_regions(pdev,
+ pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+ pci_disable_device(pdev);
+ return err;
+}
+
+/**
+ * igc_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igc_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device. This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ */
+static void igc_remove(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ set_bit(__IGC_DOWN, &adapter->state);
+
+ del_timer_sync(&adapter->watchdog_timer);
+ del_timer_sync(&adapter->phy_info_timer);
+
+ cancel_work_sync(&adapter->reset_task);
+ cancel_work_sync(&adapter->watchdog_task);
+
+ /* Release control of h/w to f/w. If f/w is AMT enabled, this
+ * would have already happened in close and is redundant.
+ */
+ igc_release_hw_control(adapter);
+ unregister_netdev(netdev);
+
+ igc_clear_interrupt_scheme(adapter);
+ pci_iounmap(pdev, adapter->io_addr);
+ pci_release_mem_regions(pdev);
+
+ kfree(adapter->mac_table);
+ kfree(adapter->shadow_vfta);
+ free_netdev(netdev);
+
+ pci_disable_pcie_error_reporting(pdev);
+
+ pci_disable_device(pdev);
+}
+
+static struct pci_driver igc_driver = {
+ .name = igc_driver_name,
+ .id_table = igc_pci_tbl,
+ .probe = igc_probe,
+ .remove = igc_remove,
+};
+
+static void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+ const u32 max_rss_queues)
+{
+ /* Determine if we need to pair queues. */
+ /* If rss_queues > half of max_rss_queues, pair the queues in
+ * order to conserve interrupts due to limited supply.
+ */
+ if (adapter->rss_queues > (max_rss_queues / 2))
+ adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+ else
+ adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
+}
+
+static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+{
+ unsigned int max_rss_queues;
+
+ /* Determine the maximum number of RSS queues supported. */
+ max_rss_queues = IGC_MAX_RX_QUEUES;
+
+ return max_rss_queues;
+}
+
+static void igc_init_queue_configuration(struct igc_adapter *adapter)
+{
+ u32 max_rss_queues;
+
+ max_rss_queues = igc_get_max_rss_queues(adapter);
+ adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+ igc_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+/**
+ * igc_sw_init - Initialize general software structures (struct igc_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igc_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ */
+static int igc_sw_init(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct igc_hw *hw = &adapter->hw;
+
+ int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+
+ pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+ /* set default ring sizes */
+ adapter->tx_ring_count = IGC_DEFAULT_TXD;
+ adapter->rx_ring_count = IGC_DEFAULT_RXD;
+
+ /* set default ITR values */
+ adapter->rx_itr_setting = IGC_DEFAULT_ITR;
+ adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+
+ /* set default work limits */
+ adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+
+ /* adjust max frame to be at least the size of a standard frame */
+ adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+ VLAN_HLEN;
+ adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+ spin_lock_init(&adapter->nfc_lock);
+ spin_lock_init(&adapter->stats64_lock);
+ /* Assume MSI-X interrupts, will be checked during IRQ allocation */
+ adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+ adapter->mac_table = kzalloc(size, GFP_ATOMIC);
+ if (!adapter->mac_table)
+ return -ENOMEM;
+
+ igc_init_queue_configuration(adapter);
+
+ /* This call may decrease the number of queues */
+ if (igc_init_interrupt_scheme(adapter, true)) {
+ dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+ return -ENOMEM;
+ }
+
+ /* Explicitly disable IRQ since the NIC can be in any state. */
+ igc_irq_disable(adapter);
+
+ set_bit(__IGC_DOWN, &adapter->state);
+
+ return 0;
+}
+
+/**
+ * igc_get_hw_dev - return device
+ * @hw: pointer to hardware structure
+ *
+ * used by hardware layer to print debugging information
+ */
+struct net_device *igc_get_hw_dev(struct igc_hw *hw)
+{
+ struct igc_adapter *adapter = hw->back;
+
+ return adapter->netdev;
+}
+
+/**
+ * igc_init_module - Driver Registration Routine
+ *
+ * igc_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init igc_init_module(void)
+{
+ int ret;
+
+ pr_info("%s - version %s\n",
+ igc_driver_string, igc_driver_version);
+
+ pr_info("%s\n", igc_copyright);
+
+ ret = pci_register_driver(&igc_driver);
+ return ret;
+}
+
+module_init(igc_init_module);
+
+/**
+ * igc_exit_module - Driver Exit Cleanup Routine
+ *
+ * igc_exit_module is called just before the driver is removed
+ * from memory.
+ */
+static void __exit igc_exit_module(void)
+{
+ pci_unregister_driver(&igc_driver);
+}
+
+module_exit(igc_exit_module);
+/* igc_main.c */
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c
new file mode 100644
index 000000000000..58f81aba0144
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Intel Corporation */
+
+#include "igc_mac.h"
+#include "igc_nvm.h"
+
+/**
+ * igc_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ * @hw: pointer to the HW structure
+ * @ee_reg: EEPROM flag for polling
+ *
+ * Polls the EEPROM status bit for either read or write completion based
+ * upon the value of 'ee_reg'.
+ */
+static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg)
+{
+ s32 ret_val = -IGC_ERR_NVM;
+ u32 attempts = 100000;
+ u32 i, reg = 0;
+
+ for (i = 0; i < attempts; i++) {
+ if (ee_reg == IGC_NVM_POLL_READ)
+ reg = rd32(IGC_EERD);
+ else
+ reg = rd32(IGC_EEWR);
+
+ if (reg & IGC_NVM_RW_REG_DONE) {
+ ret_val = 0;
+ break;
+ }
+
+ udelay(5);
+ }
+
+ return ret_val;
+}
+
+/**
+ * igc_acquire_nvm - Generic request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+s32 igc_acquire_nvm(struct igc_hw *hw)
+{
+ s32 timeout = IGC_NVM_GRANT_ATTEMPTS;
+ u32 eecd = rd32(IGC_EECD);
+ s32 ret_val = 0;
+
+ wr32(IGC_EECD, eecd | IGC_EECD_REQ);
+ eecd = rd32(IGC_EECD);
+
+ while (timeout) {
+ if (eecd & IGC_EECD_GNT)
+ break;
+ udelay(5);
+ eecd = rd32(IGC_EECD);
+ timeout--;
+ }
+
+ if (!timeout) {
+ eecd &= ~IGC_EECD_REQ;
+ wr32(IGC_EECD, eecd);
+ hw_dbg("Could not acquire NVM grant\n");
+ ret_val = -IGC_ERR_NVM;
+ }
+
+ return ret_val;
+}
+
+/**
+ * igc_release_nvm - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ */
+void igc_release_nvm(struct igc_hw *hw)
+{
+ u32 eecd;
+
+ eecd = rd32(IGC_EECD);
+ eecd &= ~IGC_EECD_REQ;
+ wr32(IGC_EECD, eecd);
+}
+
+/**
+ * igc_read_nvm_eerd - Reads EEPROM using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ */
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data)
+{
+ struct igc_nvm_info *nvm = &hw->nvm;
+ u32 i, eerd = 0;
+ s32 ret_val = 0;
+
+ /* A check for invalid values: offset too large, too many words,
+ * and not enough words.
+ */
+ if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+ words == 0) {
+ hw_dbg("nvm parameter(s) out of bounds\n");
+ ret_val = -IGC_ERR_NVM;
+ goto out;
+ }
+
+ for (i = 0; i < words; i++) {
+ eerd = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) +
+ IGC_NVM_RW_REG_START;
+
+ wr32(IGC_EERD, eerd);
+ ret_val = igc_poll_eerd_eewr_done(hw, IGC_NVM_POLL_READ);
+ if (ret_val)
+ break;
+
+ data[i] = (rd32(IGC_EERD) >> IGC_NVM_RW_REG_DATA);
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_read_mac_addr - Read device MAC address
+ * @hw: pointer to the HW structure
+ */
+s32 igc_read_mac_addr(struct igc_hw *hw)
+{
+ u32 rar_high;
+ u32 rar_low;
+ u16 i;
+
+ rar_high = rd32(IGC_RAH(0));
+ rar_low = rd32(IGC_RAL(0));
+
+ for (i = 0; i < IGC_RAL_MAC_ADDR_LEN; i++)
+ hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8));
+
+ for (i = 0; i < IGC_RAH_MAC_ADDR_LEN; i++)
+ hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8));
+
+ for (i = 0; i < ETH_ALEN; i++)
+ hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+ return 0;
+}
+
+/**
+ * igc_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+s32 igc_validate_nvm_checksum(struct igc_hw *hw)
+{
+ u16 checksum = 0;
+ u16 i, nvm_data;
+ s32 ret_val = 0;
+
+ for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ hw_dbg("NVM Read Error\n");
+ goto out;
+ }
+ checksum += nvm_data;
+ }
+
+ if (checksum != (u16)NVM_SUM) {
+ hw_dbg("NVM Checksum Invalid\n");
+ ret_val = -IGC_ERR_NVM;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum. Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM.
+ */
+s32 igc_update_nvm_checksum(struct igc_hw *hw)
+{
+ u16 checksum = 0;
+ u16 i, nvm_data;
+ s32 ret_val;
+
+ for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+ ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+ if (ret_val) {
+ hw_dbg("NVM Read Error while updating checksum.\n");
+ goto out;
+ }
+ checksum += nvm_data;
+ }
+ checksum = (u16)NVM_SUM - checksum;
+ ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+ if (ret_val)
+ hw_dbg("NVM Write Error while updating checksum.\n");
+
+out:
+ return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h
new file mode 100644
index 000000000000..f9fc2e9cfb03
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_NVM_H_
+#define _IGC_NVM_H_
+
+s32 igc_acquire_nvm(struct igc_hw *hw);
+void igc_release_nvm(struct igc_hw *hw);
+s32 igc_read_mac_addr(struct igc_hw *hw);
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data);
+s32 igc_validate_nvm_checksum(struct igc_hw *hw);
+s32 igc_update_nvm_checksum(struct igc_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
new file mode 100644
index 000000000000..38e43e6fc1c7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -0,0 +1,791 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Intel Corporation */
+
+#include "igc_phy.h"
+
+/* forward declaration */
+static s32 igc_phy_setup_autoneg(struct igc_hw *hw);
+static s32 igc_wait_autoneg(struct igc_hw *hw);
+
+/**
+ * igc_check_reset_block - Check if PHY reset is blocked
+ * @hw: pointer to the HW structure
+ *
+ * Read the PHY management control register and check whether a PHY reset
+ * is blocked. If a reset is not blocked return 0, otherwise
+ * return IGC_ERR_BLK_PHY_RESET (12).
+ */
+s32 igc_check_reset_block(struct igc_hw *hw)
+{
+ u32 manc;
+
+ manc = rd32(IGC_MANC);
+
+ return (manc & IGC_MANC_BLK_PHY_RST_ON_IDE) ?
+ IGC_ERR_BLK_PHY_RESET : 0;
+}
+
+/**
+ * igc_get_phy_id - Retrieve the PHY ID and revision
+ * @hw: pointer to the HW structure
+ *
+ * Reads the PHY registers and stores the PHY ID and possibly the PHY
+ * revision in the hardware structure.
+ */
+s32 igc_get_phy_id(struct igc_hw *hw)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ s32 ret_val = 0;
+ u16 phy_id;
+
+ ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+ if (ret_val)
+ goto out;
+
+ phy->id = (u32)(phy_id << 16);
+ usleep_range(200, 500);
+ ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+ if (ret_val)
+ goto out;
+
+ phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+ phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_phy_has_link - Polls PHY for link
+ * @hw: pointer to the HW structure
+ * @iterations: number of times to poll for link
+ * @usec_interval: delay between polling attempts
+ * @success: pointer to whether polling was successful or not
+ *
+ * Polls the PHY status register for link, 'iterations' number of times.
+ */
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+ u32 usec_interval, bool *success)
+{
+ u16 i, phy_status;
+ s32 ret_val = 0;
+
+ for (i = 0; i < iterations; i++) {
+ /* Some PHYs require the PHY_STATUS register to be read
+ * twice due to the link bit being sticky. No harm doing
+ * it across the board.
+ */
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val && usec_interval > 0) {
+ /* If the first read fails, another entity may have
+ * ownership of the resources, wait and try again to
+ * see if they have relinquished the resources yet.
+ */
+ if (usec_interval >= 1000)
+ mdelay(usec_interval / 1000);
+ else
+ udelay(usec_interval);
+ }
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ break;
+ if (phy_status & MII_SR_LINK_STATUS)
+ break;
+ if (usec_interval >= 1000)
+ mdelay(usec_interval / 1000);
+ else
+ udelay(usec_interval);
+ }
+
+ *success = (i < iterations) ? true : false;
+
+ return ret_val;
+}
+
+/**
+ * igc_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, restore the link to previous settings.
+ */
+void igc_power_up_phy_copper(struct igc_hw *hw)
+{
+ u16 mii_reg = 0;
+
+ /* The PHY will retain its settings across a power down/up cycle */
+ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+ mii_reg &= ~MII_CR_POWER_DOWN;
+ hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * igc_power_down_phy_copper - Power down copper PHY
+ * @hw: pointer to the HW structure
+ *
+ * Power down PHY to save power when interface is down and wake on lan
+ * is not enabled.
+ */
+void igc_power_down_phy_copper(struct igc_hw *hw)
+{
+ u16 mii_reg = 0;
+
+ /* The PHY will retain its settings across a power down/up cycle */
+ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+ mii_reg |= MII_CR_POWER_DOWN;
+
+ /* Temporary workaround - should be removed when PHY will implement
+ * IEEE registers as properly
+ */
+ /* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/
+ usleep_range(1000, 2000);
+}
+
+/**
+ * igc_check_downshift - Checks whether a downshift in speed occurred
+ * @hw: pointer to the HW structure
+ *
+ * Success returns 0, Failure returns 1
+ *
+ * A downshift is detected by querying the PHY link health.
+ */
+s32 igc_check_downshift(struct igc_hw *hw)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ u16 phy_data, offset, mask;
+ s32 ret_val;
+
+ switch (phy->type) {
+ case igc_phy_i225:
+ default:
+ /* speed downshift not supported */
+ phy->speed_downgraded = false;
+ ret_val = 0;
+ goto out;
+ }
+
+ ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+ if (!ret_val)
+ phy->speed_downgraded = (phy_data & mask) ? true : false;
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_phy_hw_reset - PHY hardware reset
+ * @hw: pointer to the HW structure
+ *
+ * Verify the reset block is not blocking us from resetting. Acquire
+ * semaphore (if necessary) and read/set/write the device control reset
+ * bit in the PHY. Wait the appropriate delay time for the device to
+ * reset and release the semaphore (if necessary).
+ */
+s32 igc_phy_hw_reset(struct igc_hw *hw)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ s32 ret_val;
+ u32 ctrl;
+
+ ret_val = igc_check_reset_block(hw);
+ if (ret_val) {
+ ret_val = 0;
+ goto out;
+ }
+
+ ret_val = phy->ops.acquire(hw);
+ if (ret_val)
+ goto out;
+
+ ctrl = rd32(IGC_CTRL);
+ wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST);
+ wrfl();
+
+ udelay(phy->reset_delay_us);
+
+ wr32(IGC_CTRL, ctrl);
+ wrfl();
+
+ usleep_range(1500, 2000);
+
+ phy->ops.release(hw);
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_copper_link_autoneg - Setup/Enable autoneg for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Performs initial bounds checking on autoneg advertisement parameter, then
+ * configure to advertise the full capability. Setup the PHY to autoneg
+ * and restart the negotiation process between the link partner. If
+ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ */
+static s32 igc_copper_link_autoneg(struct igc_hw *hw)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ u16 phy_ctrl;
+ s32 ret_val;
+
+ /* Perform some bounds checking on the autoneg advertisement
+ * parameter.
+ */
+ phy->autoneg_advertised &= phy->autoneg_mask;
+
+ /* If autoneg_advertised is zero, we assume it was not defaulted
+ * by the calling code so we set to advertise full capability.
+ */
+ if (phy->autoneg_advertised == 0)
+ phy->autoneg_advertised = phy->autoneg_mask;
+
+ hw_dbg("Reconfiguring auto-neg advertisement params\n");
+ ret_val = igc_phy_setup_autoneg(hw);
+ if (ret_val) {
+ hw_dbg("Error Setting up Auto-Negotiation\n");
+ goto out;
+ }
+ hw_dbg("Restarting Auto-Neg\n");
+
+ /* Restart auto-negotiation by setting the Auto Neg Enable bit and
+ * the Auto Neg Restart bit in the PHY control register.
+ */
+ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+ if (ret_val)
+ goto out;
+
+ phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+ ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+ if (ret_val)
+ goto out;
+
+ /* Does the user want to wait for Auto-Neg to complete here, or
+ * check at a later time (for example, callback routine).
+ */
+ if (phy->autoneg_wait_to_complete) {
+ ret_val = igc_wait_autoneg(hw);
+ if (ret_val) {
+ hw_dbg("Error while waiting for autoneg to complete\n");
+ goto out;
+ }
+ }
+
+ hw->mac.get_link_status = true;
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_wait_autoneg - Wait for auto-neg completion
+ * @hw: pointer to the HW structure
+ *
+ * Waits for auto-negotiation to complete or for the auto-negotiation time
+ * limit to expire, which ever happens first.
+ */
+static s32 igc_wait_autoneg(struct igc_hw *hw)
+{
+ u16 i, phy_status;
+ s32 ret_val = 0;
+
+ /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+ for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ break;
+ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+ if (ret_val)
+ break;
+ if (phy_status & MII_SR_AUTONEG_COMPLETE)
+ break;
+ msleep(100);
+ }
+
+ /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+ * has completed.
+ */
+ return ret_val;
+}
+
+/**
+ * igc_phy_setup_autoneg - Configure PHY for auto-negotiation
+ * @hw: pointer to the HW structure
+ *
+ * Reads the MII auto-neg advertisement register and/or the 1000T control
+ * register and if the PHY is already setup for auto-negotiation, then
+ * return successful. Otherwise, setup advertisement and flow control to
+ * the appropriate values for the wanted auto-negotiation.
+ */
+static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ u16 aneg_multigbt_an_ctrl = 0;
+ u16 mii_1000t_ctrl_reg = 0;
+ u16 mii_autoneg_adv_reg;
+ s32 ret_val;
+
+ phy->autoneg_advertised &= phy->autoneg_mask;
+
+ /* Read the MII Auto-Neg Advertisement Register (Address 4). */
+ ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+ if (ret_val)
+ return ret_val;
+
+ if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+ /* Read the MII 1000Base-T Control Register (Address 9). */
+ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+ &mii_1000t_ctrl_reg);
+ if (ret_val)
+ return ret_val;
+ }
+
+ if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
+ hw->phy.id == I225_I_PHY_ID) {
+ /* Read the MULTI GBT AN Control Register - reg 7.32 */
+ ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+ MMD_DEVADDR_SHIFT) |
+ ANEG_MULTIGBT_AN_CTRL,
+ &aneg_multigbt_an_ctrl);
+
+ if (ret_val)
+ return ret_val;
+ }
+
+ /* Need to parse both autoneg_advertised and fc and set up
+ * the appropriate PHY registers. First we will parse for
+ * autoneg_advertised software override. Since we can advertise
+ * a plethora of combinations, we need to check each bit
+ * individually.
+ */
+
+ /* First we clear all the 10/100 mb speed bits in the Auto-Neg
+ * Advertisement Register (Address 4) and the 1000 mb speed bits in
+ * the 1000Base-T Control Register (Address 9).
+ */
+ mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+ NWAY_AR_100TX_HD_CAPS |
+ NWAY_AR_10T_FD_CAPS |
+ NWAY_AR_10T_HD_CAPS);
+ mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+ hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+ /* Do we want to advertise 10 Mb Half Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+ hw_dbg("Advertise 10mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+ }
+
+ /* Do we want to advertise 10 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+ hw_dbg("Advertise 10mb Full duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+ }
+
+ /* Do we want to advertise 100 Mb Half Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+ hw_dbg("Advertise 100mb Half duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+ }
+
+ /* Do we want to advertise 100 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+ hw_dbg("Advertise 100mb Full duplex\n");
+ mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+ }
+
+ /* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+ if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+ hw_dbg("Advertise 1000mb Half duplex request denied!\n");
+
+ /* Do we want to advertise 1000 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+ hw_dbg("Advertise 1000mb Full duplex\n");
+ mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+ }
+
+ /* We do not allow the Phy to advertise 2500 Mb Half Duplex */
+ if (phy->autoneg_advertised & ADVERTISE_2500_HALF)
+ hw_dbg("Advertise 2500mb Half duplex request denied!\n");
+
+ /* Do we want to advertise 2500 Mb Full Duplex? */
+ if (phy->autoneg_advertised & ADVERTISE_2500_FULL) {
+ hw_dbg("Advertise 2500mb Full duplex\n");
+ aneg_multigbt_an_ctrl |= CR_2500T_FD_CAPS;
+ } else {
+ aneg_multigbt_an_ctrl &= ~CR_2500T_FD_CAPS;
+ }
+
+ /* Check for a software override of the flow control settings, and
+ * setup the PHY advertisement registers accordingly. If
+ * auto-negotiation is enabled, then software will have to set the
+ * "PAUSE" bits to the correct value in the Auto-Negotiation
+ * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+ * negotiation.
+ *
+ * The possible values of the "fc" parameter are:
+ * 0: Flow control is completely disabled
+ * 1: Rx flow control is enabled (we can receive pause frames
+ * but not send pause frames).
+ * 2: Tx flow control is enabled (we can send pause frames
+ * but we do not support receiving pause frames).
+ * 3: Both Rx and Tx flow control (symmetric) are enabled.
+ * other: No software override. The flow control configuration
+ * in the EEPROM is used.
+ */
+ switch (hw->fc.current_mode) {
+ case igc_fc_none:
+ /* Flow control (Rx & Tx) is completely disabled by a
+ * software over-ride.
+ */
+ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ case igc_fc_rx_pause:
+ /* Rx Flow control is enabled, and Tx Flow control is
+ * disabled, by a software over-ride.
+ *
+ * Since there really isn't a way to advertise that we are
+ * capable of Rx Pause ONLY, we will advertise that we
+ * support both symmetric and asymmetric Rx PAUSE. Later
+ * (in igc_config_fc_after_link_up) we will disable the
+ * hw's ability to send PAUSE frames.
+ */
+ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ case igc_fc_tx_pause:
+ /* Tx Flow control is enabled, and Rx Flow control is
+ * disabled, by a software over-ride.
+ */
+ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+ mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+ break;
+ case igc_fc_full:
+ /* Flow control (both Rx and Tx) is enabled by a software
+ * over-ride.
+ */
+ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+ break;
+ default:
+ hw_dbg("Flow control param set incorrectly\n");
+ return -IGC_ERR_CONFIG;
+ }
+
+ ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+ if (ret_val)
+ return ret_val;
+
+ hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+ if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+ ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
+ mii_1000t_ctrl_reg);
+
+ if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
+ hw->phy.id == I225_I_PHY_ID)
+ ret_val = phy->ops.write_reg(hw,
+ (STANDARD_AN_REG_MASK <<
+ MMD_DEVADDR_SHIFT) |
+ ANEG_MULTIGBT_AN_CTRL,
+ aneg_multigbt_an_ctrl);
+
+ return ret_val;
+}
+
+/**
+ * igc_setup_copper_link - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Calls the appropriate function to configure the link for auto-neg or forced
+ * speed and duplex. Then we check for link, once link is established calls
+ * to configure collision distance and flow control are called. If link is
+ * not established, we return -IGC_ERR_PHY (-2).
+ */
+s32 igc_setup_copper_link(struct igc_hw *hw)
+{
+ s32 ret_val = 0;
+ bool link;
+
+ if (hw->mac.autoneg) {
+ /* Setup autoneg and flow control advertisement and perform
+ * autonegotiation.
+ */
+ ret_val = igc_copper_link_autoneg(hw);
+ if (ret_val)
+ goto out;
+ } else {
+ /* PHY will be set to 10H, 10F, 100H or 100F
+ * depending on user settings.
+ */
+ hw_dbg("Forcing Speed and Duplex\n");
+ ret_val = hw->phy.ops.force_speed_duplex(hw);
+ if (ret_val) {
+ hw_dbg("Error Forcing Speed and Duplex\n");
+ goto out;
+ }
+ }
+
+ /* Check link status. Wait up to 100 microseconds for link to become
+ * valid.
+ */
+ ret_val = igc_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link);
+ if (ret_val)
+ goto out;
+
+ if (link) {
+ hw_dbg("Valid link established!!!\n");
+ igc_config_collision_dist(hw);
+ ret_val = igc_config_fc_after_link_up(hw);
+ } else {
+ hw_dbg("Unable to establish link!!!\n");
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_mdic - Read MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the MDI control register in the PHY at offset and stores the
+ * information read to data.
+ */
+static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ u32 i, mdic = 0;
+ s32 ret_val = 0;
+
+ if (offset > MAX_PHY_REG_ADDRESS) {
+ hw_dbg("PHY Address %d is out of range\n", offset);
+ ret_val = -IGC_ERR_PARAM;
+ goto out;
+ }
+
+ /* Set up Op-code, Phy Address, and register offset in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to retrieve the desired data.
+ */
+ mdic = ((offset << IGC_MDIC_REG_SHIFT) |
+ (phy->addr << IGC_MDIC_PHY_SHIFT) |
+ (IGC_MDIC_OP_READ));
+
+ wr32(IGC_MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+ usleep_range(500, 1000);
+ mdic = rd32(IGC_MDIC);
+ if (mdic & IGC_MDIC_READY)
+ break;
+ }
+ if (!(mdic & IGC_MDIC_READY)) {
+ hw_dbg("MDI Read did not complete\n");
+ ret_val = -IGC_ERR_PHY;
+ goto out;
+ }
+ if (mdic & IGC_MDIC_ERROR) {
+ hw_dbg("MDI Error\n");
+ ret_val = -IGC_ERR_PHY;
+ goto out;
+ }
+ *data = (u16)mdic;
+
+out:
+ return ret_val;
+}
+
+/**
+ * igc_write_phy_reg_mdic - Write MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write to register at offset
+ *
+ * Writes data to MDI control register in the PHY at offset.
+ */
+static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
+{
+ struct igc_phy_info *phy = &hw->phy;
+ u32 i, mdic = 0;
+ s32 ret_val = 0;
+
+ if (offset > MAX_PHY_REG_ADDRESS) {
+ hw_dbg("PHY Address %d is out of range\n", offset);
+ ret_val = -IGC_ERR_PARAM;
+ goto out;
+ }
+
+ /* Set up Op-code, Phy Address, and register offset in the MDI
+ * Control register. The MAC will take care of interfacing with the
+ * PHY to write the desired data.
+ */
+ mdic = (((u32)data) |
+ (offset << IGC_MDIC_REG_SHIFT) |
+ (phy->addr << IGC_MDIC_PHY_SHIFT) |
+ (IGC_MDIC_OP_WRITE));
+
+ wr32(IGC_MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+ usleep_range(500, 1000);
+ mdic = rd32(IGC_MDIC);
+ if (mdic & IGC_MDIC_READY)
+ break;
+ }
+ if (!(mdic & IGC_MDIC_READY)) {
+ hw_dbg("MDI Write did not complete\n");
+ ret_val = -IGC_ERR_PHY;
+ goto out;
+ }
+ if (mdic & IGC_MDIC_ERROR) {
+ hw_dbg("MDI Error\n");
+ ret_val = -IGC_ERR_PHY;
+ goto out;
+ }
+
+out:
+ return ret_val;
+}
+
+/**
+ * __igc_access_xmdio_reg - Read/write XMDIO register
+ * @hw: pointer to the HW structure
+ * @address: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: pointer to value to read/write from/to the XMDIO address
+ * @read: boolean flag to indicate read or write
+ */
+static s32 __igc_access_xmdio_reg(struct igc_hw *hw, u16 address,
+ u8 dev_addr, u16 *data, bool read)
+{
+ s32 ret_val;
+
+ ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, dev_addr);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, address);
+ if (ret_val)
+ return ret_val;
+
+ ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, IGC_MMDAC_FUNC_DATA |
+ dev_addr);
+ if (ret_val)
+ return ret_val;
+
+ if (read)
+ ret_val = hw->phy.ops.read_reg(hw, IGC_MMDAAD, data);
+ else
+ ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, *data);
+ if (ret_val)
+ return ret_val;
+
+ /* Recalibrate the device back to 0 */
+ ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, 0);
+ if (ret_val)
+ return ret_val;
+
+ return ret_val;
+}
+
+/**
+ * igc_read_xmdio_reg - Read XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be read from the EMI address
+ */
+static s32 igc_read_xmdio_reg(struct igc_hw *hw, u16 addr,
+ u8 dev_addr, u16 *data)
+{
+ return __igc_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ * igc_write_xmdio_reg - Write XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be written to the XMDIO address
+ */
+static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr,
+ u8 dev_addr, u16 data)
+{
+ return __igc_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
+
+/**
+ * igc_write_phy_reg_gpy - Write GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data)
+{
+ u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+ s32 ret_val;
+
+ offset = offset & GPY_REG_MASK;
+
+ if (!dev_addr) {
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+ ret_val = igc_write_phy_reg_mdic(hw, offset, data);
+ if (ret_val)
+ return ret_val;
+ hw->phy.ops.release(hw);
+ } else {
+ ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr,
+ data);
+ }
+
+ return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_gpy - Read GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: lower half is register offset to read to
+ * upper half is MMD to use.
+ * @data: data to read at register offset
+ *
+ * Acquires semaphore, if necessary, then reads the data in the PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data)
+{
+ u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+ s32 ret_val;
+
+ offset = offset & GPY_REG_MASK;
+
+ if (!dev_addr) {
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+ ret_val = igc_read_phy_reg_mdic(hw, offset, data);
+ if (ret_val)
+ return ret_val;
+ hw->phy.ops.release(hw);
+ } else {
+ ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr,
+ data);
+ }
+
+ return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h
new file mode 100644
index 000000000000..25cba33de7e2
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_PHY_H_
+#define _IGC_PHY_H_
+
+#include "igc_mac.h"
+
+s32 igc_check_reset_block(struct igc_hw *hw);
+s32 igc_phy_hw_reset(struct igc_hw *hw);
+s32 igc_get_phy_id(struct igc_hw *hw);
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+ u32 usec_interval, bool *success);
+s32 igc_check_downshift(struct igc_hw *hw);
+s32 igc_setup_copper_link(struct igc_hw *hw);
+void igc_power_up_phy_copper(struct igc_hw *hw);
+void igc_power_down_phy_copper(struct igc_hw *hw);
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data);
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
new file mode 100644
index 000000000000..a1bd3216c906
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Intel Corporation */
+
+#ifndef _IGC_REGS_H_
+#define _IGC_REGS_H_
+
+/* General Register Descriptions */
+#define IGC_CTRL 0x00000 /* Device Control - RW */
+#define IGC_STATUS 0x00008 /* Device Status - RO */
+#define IGC_EECD 0x00010 /* EEPROM/Flash Control - RW */
+#define IGC_CTRL_EXT 0x00018 /* Extended Device Control - RW */
+#define IGC_MDIC 0x00020 /* MDI Control - RW */
+#define IGC_MDICNFG 0x00E04 /* MDC/MDIO Configuration - RW */
+#define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */
+
+/* Internal Packet Buffer Size Registers */
+#define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */
+#define IGC_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */
+
+/* NVM Register Descriptions */
+#define IGC_EERD 0x12014 /* EEprom mode read - RW */
+#define IGC_EEWR 0x12018 /* EEprom mode write - RW */
+
+/* Flow Control Register Descriptions */
+#define IGC_FCAL 0x00028 /* FC Address Low - RW */
+#define IGC_FCAH 0x0002C /* FC Address High - RW */
+#define IGC_FCT 0x00030 /* FC Type - RW */
+#define IGC_FCTTV 0x00170 /* FC Transmit Timer - RW */
+#define IGC_FCRTL 0x02160 /* FC Receive Threshold Low - RW */
+#define IGC_FCRTH 0x02168 /* FC Receive Threshold High - RW */
+#define IGC_FCRTV 0x02460 /* FC Refresh Timer Value - RW */
+#define IGC_FCSTS 0x02464 /* FC Status - RO */
+
+/* PCIe Register Description */
+#define IGC_GCR 0x05B00 /* PCIe control- RW */
+
+/* Semaphore registers */
+#define IGC_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */
+#define IGC_SWSM 0x05B50 /* SW Semaphore */
+#define IGC_FWSM 0x05B54 /* FW Semaphore */
+
+/* Function Active and Power State to MNG */
+#define IGC_FACTPS 0x05B30
+
+/* Interrupt Register Description */
+#define IGC_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */
+#define IGC_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */
+#define IGC_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */
+#define IGC_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */
+#define IGC_EIAM 0x01530 /* Ext. Interrupt Auto Mask - RW */
+#define IGC_ICR 0x01500 /* Intr Cause Read - RC/W1C */
+#define IGC_ICS 0x01504 /* Intr Cause Set - WO */
+#define IGC_IMS 0x01508 /* Intr Mask Set/Read - RW */
+#define IGC_IMC 0x0150C /* Intr Mask Clear - WO */
+#define IGC_IAM 0x01510 /* Intr Ack Auto Mask- RW */
+/* Intr Throttle - RW */
+#define IGC_EITR(_n) (0x01680 + (0x4 * (_n)))
+/* Interrupt Vector Allocation - RW */
+#define IGC_IVAR0 0x01700
+#define IGC_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */
+#define IGC_GPIE 0x01514 /* General Purpose Intr Enable - RW */
+
+/* Interrupt Cause */
+#define IGC_ICRXPTC 0x04104 /* Rx Packet Timer Expire Count */
+#define IGC_ICRXATC 0x04108 /* Rx Absolute Timer Expire Count */
+#define IGC_ICTXPTC 0x0410C /* Tx Packet Timer Expire Count */
+#define IGC_ICTXATC 0x04110 /* Tx Absolute Timer Expire Count */
+#define IGC_ICTXQEC 0x04118 /* Tx Queue Empty Count */
+#define IGC_ICTXQMTC 0x0411C /* Tx Queue Min Threshold Count */
+#define IGC_ICRXDMTC 0x04120 /* Rx Descriptor Min Threshold Count */
+#define IGC_ICRXOC 0x04124 /* Receiver Overrun Count */
+
+#define IGC_CBTMPC 0x0402C /* Circuit Breaker TX Packet Count */
+#define IGC_HTDPMC 0x0403C /* Host Transmit Discarded Packets */
+#define IGC_CBRMPC 0x040FC /* Circuit Breaker RX Packet Count */
+#define IGC_RPTHC 0x04104 /* Rx Packets To Host */
+#define IGC_HGPTC 0x04118 /* Host Good Packets TX Count */
+#define IGC_HTCBDPC 0x04124 /* Host TX Circ.Breaker Drop Count */
+
+/* MSI-X Table Register Descriptions */
+#define IGC_PBACL 0x05B68 /* MSIx PBA Clear - R/W 1 to clear */
+
+/* Receive Register Descriptions */
+#define IGC_RCTL 0x00100 /* Rx Control - RW */
+#define IGC_SRRCTL(_n) (0x0C00C + ((_n) * 0x40))
+#define IGC_PSRTYPE(_i) (0x05480 + ((_i) * 4))
+#define IGC_RDBAL(_n) (0x0C000 + ((_n) * 0x40))
+#define IGC_RDBAH(_n) (0x0C004 + ((_n) * 0x40))
+#define IGC_RDLEN(_n) (0x0C008 + ((_n) * 0x40))
+#define IGC_RDH(_n) (0x0C010 + ((_n) * 0x40))
+#define IGC_RDT(_n) (0x0C018 + ((_n) * 0x40))
+#define IGC_RXDCTL(_n) (0x0C028 + ((_n) * 0x40))
+#define IGC_RQDPC(_n) (0x0C030 + ((_n) * 0x40))
+#define IGC_RXCSUM 0x05000 /* Rx Checksum Control - RW */
+#define IGC_RLPML 0x05004 /* Rx Long Packet Max Length */
+#define IGC_RFCTL 0x05008 /* Receive Filter Control*/
+#define IGC_MTA 0x05200 /* Multicast Table Array - RW Array */
+#define IGC_UTA 0x0A000 /* Unicast Table Array - RW */
+#define IGC_RAL(_n) (0x05400 + ((_n) * 0x08))
+#define IGC_RAH(_n) (0x05404 + ((_n) * 0x08))
+
+/* Transmit Register Descriptions */
+#define IGC_TCTL 0x00400 /* Tx Control - RW */
+#define IGC_TIPG 0x00410 /* Tx Inter-packet gap - RW */
+#define IGC_TDBAL(_n) (0x0E000 + ((_n) * 0x40))
+#define IGC_TDBAH(_n) (0x0E004 + ((_n) * 0x40))
+#define IGC_TDLEN(_n) (0x0E008 + ((_n) * 0x40))
+#define IGC_TDH(_n) (0x0E010 + ((_n) * 0x40))
+#define IGC_TDT(_n) (0x0E018 + ((_n) * 0x40))
+#define IGC_TXDCTL(_n) (0x0E028 + ((_n) * 0x40))
+
+/* MMD Register Descriptions */
+#define IGC_MMDAC 13 /* MMD Access Control */
+#define IGC_MMDAAD 14 /* MMD Access Address/Data */
+
+/* Good transmitted packets counter registers */
+#define IGC_PQGPTC(_n) (0x010014 + (0x100 * (_n)))
+
+/* Statistics Register Descriptions */
+#define IGC_CRCERRS 0x04000 /* CRC Error Count - R/clr */
+#define IGC_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */
+#define IGC_SYMERRS 0x04008 /* Symbol Error Count - R/clr */
+#define IGC_RXERRC 0x0400C /* Receive Error Count - R/clr */
+#define IGC_MPC 0x04010 /* Missed Packet Count - R/clr */
+#define IGC_SCC 0x04014 /* Single Collision Count - R/clr */
+#define IGC_ECOL 0x04018 /* Excessive Collision Count - R/clr */
+#define IGC_MCC 0x0401C /* Multiple Collision Count - R/clr */
+#define IGC_LATECOL 0x04020 /* Late Collision Count - R/clr */
+#define IGC_COLC 0x04028 /* Collision Count - R/clr */
+#define IGC_DC 0x04030 /* Defer Count - R/clr */
+#define IGC_TNCRS 0x04034 /* Tx-No CRS - R/clr */
+#define IGC_SEC 0x04038 /* Sequence Error Count - R/clr */
+#define IGC_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */
+#define IGC_RLEC 0x04040 /* Receive Length Error Count - R/clr */
+#define IGC_XONRXC 0x04048 /* XON Rx Count - R/clr */
+#define IGC_XONTXC 0x0404C /* XON Tx Count - R/clr */
+#define IGC_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */
+#define IGC_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */
+#define IGC_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */
+#define IGC_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */
+#define IGC_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */
+#define IGC_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */
+#define IGC_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */
+#define IGC_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */
+#define IGC_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */
+#define IGC_GPRC 0x04074 /* Good Packets Rx Count - R/clr */
+#define IGC_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */
+#define IGC_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */
+#define IGC_GPTC 0x04080 /* Good Packets Tx Count - R/clr */
+#define IGC_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */
+#define IGC_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */
+#define IGC_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */
+#define IGC_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */
+#define IGC_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */
+#define IGC_RUC 0x040A4 /* Rx Undersize Count - R/clr */
+#define IGC_RFC 0x040A8 /* Rx Fragment Count - R/clr */
+#define IGC_ROC 0x040AC /* Rx Oversize Count - R/clr */
+#define IGC_RJC 0x040B0 /* Rx Jabber Count - R/clr */
+#define IGC_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */
+#define IGC_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */
+#define IGC_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */
+#define IGC_TORL 0x040C0 /* Total Octets Rx Low - R/clr */
+#define IGC_TORH 0x040C4 /* Total Octets Rx High - R/clr */
+#define IGC_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */
+#define IGC_TOTH 0x040CC /* Total Octets Tx High - R/clr */
+#define IGC_TPR 0x040D0 /* Total Packets Rx - R/clr */
+#define IGC_TPT 0x040D4 /* Total Packets Tx - R/clr */
+#define IGC_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */
+#define IGC_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */
+#define IGC_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */
+#define IGC_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */
+#define IGC_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */
+#define IGC_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */
+#define IGC_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */
+#define IGC_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */
+#define IGC_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */
+#define IGC_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */
+#define IGC_IAC 0x04100 /* Interrupt Assertion Count */
+#define IGC_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */
+#define IGC_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */
+#define IGC_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */
+#define IGC_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */
+#define IGC_RPTHC 0x04104 /* Rx Packets To Host */
+#define IGC_HGPTC 0x04118 /* Host Good Packets Tx Count */
+#define IGC_RXDMTC 0x04120 /* Rx Descriptor Minimum Threshold Count */
+#define IGC_HGORCL 0x04128 /* Host Good Octets Received Count Low */
+#define IGC_HGORCH 0x0412C /* Host Good Octets Received Count High */
+#define IGC_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */
+#define IGC_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */
+#define IGC_LENERRS 0x04138 /* Length Errors Count */
+#define IGC_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */
+#define IGC_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */
+
+/* Management registers */
+#define IGC_MANC 0x05820 /* Management Control - RW */
+
+/* Shadow Ram Write Register - RW */
+#define IGC_SRWR 0x12018
+
+/* forward declaration */
+struct igc_hw;
+u32 igc_rd32(struct igc_hw *hw, u32 reg);
+
+/* write operations, indexed using DWORDS */
+#define wr32(reg, val) \
+do { \
+ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+ if (!IGC_REMOVED(hw_addr)) \
+ writel((val), &hw_addr[(reg)]); \
+} while (0)
+
+#define rd32(reg) (igc_rd32(hw, reg))
+
+#define wrfl() ((void)rd32(IGC_STATUS))
+
+#define array_wr32(reg, offset, value) \
+ wr32((reg) + ((offset) << 2), (value))
+
+#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+
+#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index eaac26424486..45b108f8f955 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
octeontx2_mbox-y := mbox.o
-octeontx2_af-y := cgx.o rvu.o rvu_cgx.o
+octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 5328ecc8cea2..352501b54aee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -29,6 +29,7 @@
* @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion
* @cmd_lock: Lock to serialize the command interface
* @resp: command response
+ * @link_info: link related information
* @event_cb: callback for linkchange events
* @cmd_pend: flag set before new command is started
* flag cleared after command response is received
@@ -40,6 +41,7 @@ struct lmac {
wait_queue_head_t wq_cmd_cmplt;
struct mutex cmd_lock;
u64 resp;
+ struct cgx_link_user_info link_info;
struct cgx_event_cb event_cb;
bool cmd_pend;
struct cgx *cgx;
@@ -58,6 +60,12 @@ struct cgx {
static LIST_HEAD(cgx_list);
+/* Convert firmware speed encoding to user format(Mbps) */
+static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX];
+
+/* Convert firmware lmac type encoding to string */
+static char *cgx_lmactype_string[LMAC_MODE_MAX];
+
/* Supported devices */
static const struct pci_device_id cgx_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) },
@@ -119,6 +127,177 @@ void *cgx_get_pdata(int cgx_id)
}
EXPORT_SYMBOL(cgx_get_pdata);
+/* Ensure the required lock for event queue(where asynchronous events are
+ * posted) is acquired before calling this API. Else an asynchronous event(with
+ * latest link status) can reach the destination before this function returns
+ * and could make the link status appear wrong.
+ */
+int cgx_get_link_info(void *cgxd, int lmac_id,
+ struct cgx_link_user_info *linfo)
+{
+ struct lmac *lmac = lmac_pdata(lmac_id, cgxd);
+
+ if (!lmac)
+ return -ENODEV;
+
+ *linfo = lmac->link_info;
+ return 0;
+}
+EXPORT_SYMBOL(cgx_get_link_info);
+
+static u64 mac2u64 (u8 *mac_addr)
+{
+ u64 mac = 0;
+ int index;
+
+ for (index = ETH_ALEN - 1; index >= 0; index--)
+ mac |= ((u64)*mac_addr++) << (8 * index);
+ return mac;
+}
+
+int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr)
+{
+ struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+ u64 cfg;
+
+ /* copy 6bytes from macaddr */
+ /* memcpy(&cfg, mac_addr, 6); */
+
+ cfg = mac2u64 (mac_addr);
+
+ cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (lmac_id * 0x8)),
+ cfg | CGX_DMAC_CAM_ADDR_ENABLE | ((u64)lmac_id << 49));
+
+ cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+ cfg |= CGX_DMAC_CTL0_CAM_ENABLE;
+ cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+
+ return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_addr_set);
+
+u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id)
+{
+ struct cgx *cgx_dev = cgx_get_pdata(cgx_id);
+ u64 cfg;
+
+ cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8);
+ return cfg & CGX_RX_DMAC_ADR_MASK;
+}
+EXPORT_SYMBOL(cgx_lmac_addr_get);
+
+static inline u8 cgx_get_lmac_type(struct cgx *cgx, int lmac_id)
+{
+ u64 cfg;
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+ return (cfg >> CGX_LMAC_TYPE_SHIFT) & CGX_LMAC_TYPE_MASK;
+}
+
+/* Configure CGX LMAC in internal loopback mode */
+int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable)
+{
+ struct cgx *cgx = cgxd;
+ u8 lmac_type;
+ u64 cfg;
+
+ if (!cgx || lmac_id >= cgx->lmac_count)
+ return -ENODEV;
+
+ lmac_type = cgx_get_lmac_type(cgx, lmac_id);
+ if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) {
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL);
+ if (enable)
+ cfg |= CGXX_GMP_PCS_MRX_CTL_LBK;
+ else
+ cfg &= ~CGXX_GMP_PCS_MRX_CTL_LBK;
+ cgx_write(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL, cfg);
+ } else {
+ cfg = cgx_read(cgx, lmac_id, CGXX_SPUX_CONTROL1);
+ if (enable)
+ cfg |= CGXX_SPUX_CONTROL1_LBK;
+ else
+ cfg &= ~CGXX_SPUX_CONTROL1_LBK;
+ cgx_write(cgx, lmac_id, CGXX_SPUX_CONTROL1, cfg);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_internal_loopback);
+
+void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable)
+{
+ struct cgx *cgx = cgx_get_pdata(cgx_id);
+ u64 cfg = 0;
+
+ if (!cgx)
+ return;
+
+ if (enable) {
+ /* Enable promiscuous mode on LMAC */
+ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+ cfg &= ~(CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE);
+ cfg |= CGX_DMAC_BCAST_MODE;
+ cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+
+ cfg = cgx_read(cgx, 0,
+ (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8));
+ cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE;
+ cgx_write(cgx, 0,
+ (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg);
+ } else {
+ /* Disable promiscuous mode */
+ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0);
+ cfg |= CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE;
+ cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg);
+ cfg = cgx_read(cgx, 0,
+ (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8));
+ cfg |= CGX_DMAC_CAM_ADDR_ENABLE;
+ cgx_write(cgx, 0,
+ (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg);
+ }
+}
+EXPORT_SYMBOL(cgx_lmac_promisc_config);
+
+int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat)
+{
+ struct cgx *cgx = cgxd;
+
+ if (!cgx || lmac_id >= cgx->lmac_count)
+ return -ENODEV;
+ *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8));
+ return 0;
+}
+EXPORT_SYMBOL(cgx_get_rx_stats);
+
+int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat)
+{
+ struct cgx *cgx = cgxd;
+
+ if (!cgx || lmac_id >= cgx->lmac_count)
+ return -ENODEV;
+ *tx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (idx * 8));
+ return 0;
+}
+EXPORT_SYMBOL(cgx_get_tx_stats);
+
+int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable)
+{
+ struct cgx *cgx = cgxd;
+ u64 cfg;
+
+ if (!cgx || lmac_id >= cgx->lmac_count)
+ return -ENODEV;
+
+ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG);
+ if (enable)
+ cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN;
+ else
+ cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN);
+ cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg);
+ return 0;
+}
+EXPORT_SYMBOL(cgx_lmac_rx_tx_enable);
+
/* CGX Firmware interface low level support */
static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac)
{
@@ -191,36 +370,79 @@ static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp,
return err;
}
+static inline void cgx_link_usertable_init(void)
+{
+ cgx_speed_mbps[CGX_LINK_NONE] = 0;
+ cgx_speed_mbps[CGX_LINK_10M] = 10;
+ cgx_speed_mbps[CGX_LINK_100M] = 100;
+ cgx_speed_mbps[CGX_LINK_1G] = 1000;
+ cgx_speed_mbps[CGX_LINK_2HG] = 2500;
+ cgx_speed_mbps[CGX_LINK_5G] = 5000;
+ cgx_speed_mbps[CGX_LINK_10G] = 10000;
+ cgx_speed_mbps[CGX_LINK_20G] = 20000;
+ cgx_speed_mbps[CGX_LINK_25G] = 25000;
+ cgx_speed_mbps[CGX_LINK_40G] = 40000;
+ cgx_speed_mbps[CGX_LINK_50G] = 50000;
+ cgx_speed_mbps[CGX_LINK_100G] = 100000;
+
+ cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII";
+ cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI";
+ cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI";
+ cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R";
+ cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R";
+ cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII";
+ cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R";
+ cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R";
+ cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R";
+ cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII";
+}
+
+static inline void link_status_user_format(u64 lstat,
+ struct cgx_link_user_info *linfo,
+ struct cgx *cgx, u8 lmac_id)
+{
+ char *lmac_string;
+
+ linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
+ linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
+ linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)];
+ linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id);
+ lmac_string = cgx_lmactype_string[linfo->lmac_type_id];
+ strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1);
+}
+
/* Hardware event handlers */
static inline void cgx_link_change_handler(u64 lstat,
struct lmac *lmac)
{
+ struct cgx_link_user_info *linfo;
struct cgx *cgx = lmac->cgx;
struct cgx_link_event event;
struct device *dev;
+ int err_type;
dev = &cgx->pdev->dev;
- event.lstat.link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
- event.lstat.full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
- event.lstat.speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat);
- event.lstat.err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat);
+ link_status_user_format(lstat, &event.link_uinfo, cgx, lmac->lmac_id);
+ err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat);
event.cgx_id = cgx->cgx_id;
event.lmac_id = lmac->lmac_id;
+ /* update the local copy of link status */
+ lmac->link_info = event.link_uinfo;
+ linfo = &lmac->link_info;
+
if (!lmac->event_cb.notify_link_chg) {
dev_dbg(dev, "cgx port %d:%d Link change handler null",
cgx->cgx_id, lmac->lmac_id);
- if (event.lstat.err_type != CGX_ERR_NONE) {
+ if (err_type != CGX_ERR_NONE) {
dev_err(dev, "cgx port %d:%d Link error %d\n",
- cgx->cgx_id, lmac->lmac_id,
- event.lstat.err_type);
+ cgx->cgx_id, lmac->lmac_id, err_type);
}
- dev_info(dev, "cgx port %d:%d Link status %s, speed %x\n",
+ dev_info(dev, "cgx port %d:%d Link is %s %d Mbps\n",
cgx->cgx_id, lmac->lmac_id,
- event.lstat.link_up ? "UP" : "DOWN",
- event.lstat.speed);
+ linfo->link_up ? "UP" : "DOWN", linfo->speed);
return;
}
@@ -448,6 +670,8 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
list_add(&cgx->cgx_list, &cgx_list);
cgx->cgx_id = cgx_get_cgx_cnt() - 1;
+ cgx_link_usertable_init();
+
err = cgx_lmac_init(cgx);
if (err)
goto err_release_lmac;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index a2a7a6d72d32..ada25ed0765b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -11,6 +11,7 @@
#ifndef CGX_H
#define CGX_H
+#include "mbox.h"
#include "cgx_fw_if.h"
/* PCI device IDs */
@@ -24,14 +25,35 @@
#define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX)
/* Registers */
+#define CGXX_CMRX_CFG 0x00
+#define CMR_EN BIT_ULL(55)
+#define DATA_PKT_TX_EN BIT_ULL(53)
+#define DATA_PKT_RX_EN BIT_ULL(54)
+#define CGX_LMAC_TYPE_SHIFT 40
+#define CGX_LMAC_TYPE_MASK 0xF
#define CGXX_CMRX_INT 0x040
#define FW_CGX_INT BIT_ULL(1)
#define CGXX_CMRX_INT_ENA_W1S 0x058
#define CGXX_CMRX_RX_ID_MAP 0x060
+#define CGXX_CMRX_RX_STAT0 0x070
#define CGXX_CMRX_RX_LMACS 0x128
+#define CGXX_CMRX_RX_DMAC_CTL0 0x1F8
+#define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3)
+#define CGX_DMAC_CAM_ACCEPT BIT_ULL(3)
+#define CGX_DMAC_MCAST_MODE BIT_ULL(1)
+#define CGX_DMAC_BCAST_MODE BIT_ULL(0)
+#define CGXX_CMRX_RX_DMAC_CAM0 0x200
+#define CGX_DMAC_CAM_ADDR_ENABLE BIT_ULL(48)
+#define CGXX_CMRX_RX_DMAC_CAM1 0x400
+#define CGX_RX_DMAC_ADR_MASK GENMASK_ULL(47, 0)
+#define CGXX_CMRX_TX_STAT0 0x700
#define CGXX_SCRATCH0_REG 0x1050
#define CGXX_SCRATCH1_REG 0x1058
#define CGX_CONST 0x2000
+#define CGXX_SPUX_CONTROL1 0x10000
+#define CGXX_SPUX_CONTROL1_LBK BIT_ULL(14)
+#define CGXX_GMP_PCS_MRX_CTL 0x30000
+#define CGXX_GMP_PCS_MRX_CTL_LBK BIT_ULL(14)
#define CGX_COMMAND_REG CGXX_SCRATCH1_REG
#define CGX_EVENT_REG CGXX_SCRATCH0_REG
@@ -40,8 +62,22 @@
#define CGX_NVEC 37
#define CGX_LMAC_FWI 0
+enum LMAC_TYPE {
+ LMAC_MODE_SGMII = 0,
+ LMAC_MODE_XAUI = 1,
+ LMAC_MODE_RXAUI = 2,
+ LMAC_MODE_10G_R = 3,
+ LMAC_MODE_40G_R = 4,
+ LMAC_MODE_QSGMII = 6,
+ LMAC_MODE_25G_R = 7,
+ LMAC_MODE_50G_R = 8,
+ LMAC_MODE_100G_R = 9,
+ LMAC_MODE_USXGMII = 10,
+ LMAC_MODE_MAX,
+};
+
struct cgx_link_event {
- struct cgx_lnk_sts lstat;
+ struct cgx_link_user_info link_uinfo;
u8 cgx_id;
u8 lmac_id;
};
@@ -62,4 +98,13 @@ int cgx_get_cgx_cnt(void);
int cgx_get_lmac_cnt(void *cgxd);
void *cgx_get_pdata(int cgx_id);
int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id);
+int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat);
+int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat);
+int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable);
+int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr);
+u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id);
+void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable);
+int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable);
+int cgx_get_link_info(void *cgxd, int lmac_id,
+ struct cgx_link_user_info *linfo);
#endif /* CGX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
new file mode 100644
index 000000000000..28eb691185f4
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "rvu_struct.h"
+
+#define OTX2_ALIGN 128 /* Align to cacheline */
+
+#define Q_SIZE_16 0ULL /* 16 entries */
+#define Q_SIZE_64 1ULL /* 64 entries */
+#define Q_SIZE_256 2ULL
+#define Q_SIZE_1K 3ULL
+#define Q_SIZE_4K 4ULL
+#define Q_SIZE_16K 5ULL
+#define Q_SIZE_64K 6ULL
+#define Q_SIZE_256K 7ULL
+#define Q_SIZE_1M 8ULL /* Million entries */
+#define Q_SIZE_MIN Q_SIZE_16
+#define Q_SIZE_MAX Q_SIZE_1M
+
+#define Q_COUNT(x) (16ULL << (2 * x))
+#define Q_SIZE(x, n) ((ilog2(x) - (n)) / 2)
+
+/* Admin queue info */
+
+/* Since we intend to add only one instruction at a time,
+ * keep queue size to it's minimum.
+ */
+#define AQ_SIZE Q_SIZE_16
+/* HW head & tail pointer mask */
+#define AQ_PTR_MASK 0xFFFFF
+
+struct qmem {
+ void *base;
+ dma_addr_t iova;
+ int alloc_sz;
+ u8 entry_sz;
+ u8 align;
+ u32 qsize;
+};
+
+static inline int qmem_alloc(struct device *dev, struct qmem **q,
+ int qsize, int entry_sz)
+{
+ struct qmem *qmem;
+ int aligned_addr;
+
+ if (!qsize)
+ return -EINVAL;
+
+ *q = devm_kzalloc(dev, sizeof(*qmem), GFP_KERNEL);
+ if (!*q)
+ return -ENOMEM;
+ qmem = *q;
+
+ qmem->entry_sz = entry_sz;
+ qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN;
+ qmem->base = dma_zalloc_coherent(dev, qmem->alloc_sz,
+ &qmem->iova, GFP_KERNEL);
+ if (!qmem->base)
+ return -ENOMEM;
+
+ qmem->qsize = qsize;
+
+ aligned_addr = ALIGN((u64)qmem->iova, OTX2_ALIGN);
+ qmem->align = (aligned_addr - qmem->iova);
+ qmem->base += qmem->align;
+ qmem->iova += qmem->align;
+ return 0;
+}
+
+static inline void qmem_free(struct device *dev, struct qmem *qmem)
+{
+ if (!qmem)
+ return;
+
+ if (qmem->base)
+ dma_free_coherent(dev, qmem->alloc_sz,
+ qmem->base - qmem->align,
+ qmem->iova - qmem->align);
+ devm_kfree(dev, qmem);
+}
+
+struct admin_queue {
+ struct qmem *inst;
+ struct qmem *res;
+ spinlock_t lock; /* Serialize inst enqueue from PFs */
+};
+
+/* NPA aura count */
+enum npa_aura_sz {
+ NPA_AURA_SZ_0,
+ NPA_AURA_SZ_128,
+ NPA_AURA_SZ_256,
+ NPA_AURA_SZ_512,
+ NPA_AURA_SZ_1K,
+ NPA_AURA_SZ_2K,
+ NPA_AURA_SZ_4K,
+ NPA_AURA_SZ_8K,
+ NPA_AURA_SZ_16K,
+ NPA_AURA_SZ_32K,
+ NPA_AURA_SZ_64K,
+ NPA_AURA_SZ_128K,
+ NPA_AURA_SZ_256K,
+ NPA_AURA_SZ_512K,
+ NPA_AURA_SZ_1M,
+ NPA_AURA_SZ_MAX,
+};
+
+#define NPA_AURA_COUNT(x) (1ULL << ((x) + 6))
+
+/* NPA AQ result structure for init/read/write of aura HW contexts */
+struct npa_aq_aura_res {
+ struct npa_aq_res_s res;
+ struct npa_aura_s aura_ctx;
+ struct npa_aura_s ctx_mask;
+};
+
+/* NPA AQ result structure for init/read/write of pool HW contexts */
+struct npa_aq_pool_res {
+ struct npa_aq_res_s res;
+ struct npa_pool_s pool_ctx;
+ struct npa_pool_s ctx_mask;
+};
+
+/* NIX Transmit schedulers */
+enum nix_scheduler {
+ NIX_TXSCH_LVL_SMQ = 0x0,
+ NIX_TXSCH_LVL_MDQ = 0x0,
+ NIX_TXSCH_LVL_TL4 = 0x1,
+ NIX_TXSCH_LVL_TL3 = 0x2,
+ NIX_TXSCH_LVL_TL2 = 0x3,
+ NIX_TXSCH_LVL_TL1 = 0x4,
+ NIX_TXSCH_LVL_CNT = 0x5,
+};
+
+/* NIX LSO format indices.
+ * As of now TSO is the only one using, so statically assigning indices.
+ */
+#define NIX_LSO_FORMAT_IDX_TSOV4 0
+#define NIX_LSO_FORMAT_IDX_TSOV6 1
+
+/* RSS info */
+#define MAX_RSS_GROUPS 8
+/* Group 0 has to be used in default pkt forwarding MCAM entries
+ * reserved for NIXLFs. Groups 1-7 can be used for RSS for ntuple
+ * filters.
+ */
+#define DEFAULT_RSS_CONTEXT_GROUP 0
+#define MAX_RSS_INDIR_TBL_SIZE 256 /* 1 << Max adder bits */
+
+#endif /* COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index bedf0ee62450..c339024d04e0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -124,21 +124,50 @@ M(ATTACH_RESOURCES, 0x002, rsrc_attach, msg_rsp) \
M(DETACH_RESOURCES, 0x003, rsrc_detach, msg_rsp) \
M(MSIX_OFFSET, 0x004, msg_req, msix_offset_rsp) \
/* CGX mbox IDs (range 0x200 - 0x3FF) */ \
+M(CGX_START_RXTX, 0x200, msg_req, msg_rsp) \
+M(CGX_STOP_RXTX, 0x201, msg_req, msg_rsp) \
+M(CGX_STATS, 0x202, msg_req, cgx_stats_rsp) \
+M(CGX_MAC_ADDR_SET, 0x203, cgx_mac_addr_set_or_get, \
+ cgx_mac_addr_set_or_get) \
+M(CGX_MAC_ADDR_GET, 0x204, cgx_mac_addr_set_or_get, \
+ cgx_mac_addr_set_or_get) \
+M(CGX_PROMISC_ENABLE, 0x205, msg_req, msg_rsp) \
+M(CGX_PROMISC_DISABLE, 0x206, msg_req, msg_rsp) \
+M(CGX_START_LINKEVENTS, 0x207, msg_req, msg_rsp) \
+M(CGX_STOP_LINKEVENTS, 0x208, msg_req, msg_rsp) \
+M(CGX_GET_LINKINFO, 0x209, msg_req, cgx_link_info_msg) \
+M(CGX_INTLBK_ENABLE, 0x20A, msg_req, msg_rsp) \
+M(CGX_INTLBK_DISABLE, 0x20B, msg_req, msg_rsp) \
/* NPA mbox IDs (range 0x400 - 0x5FF) */ \
+M(NPA_LF_ALLOC, 0x400, npa_lf_alloc_req, npa_lf_alloc_rsp) \
+M(NPA_LF_FREE, 0x401, msg_req, msg_rsp) \
+M(NPA_AQ_ENQ, 0x402, npa_aq_enq_req, npa_aq_enq_rsp) \
+M(NPA_HWCTX_DISABLE, 0x403, hwctx_disable_req, msg_rsp) \
/* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */ \
/* TIM mbox IDs (range 0x800 - 0x9FF) */ \
/* CPT mbox IDs (range 0xA00 - 0xBFF) */ \
/* NPC mbox IDs (range 0x6000 - 0x7FFF) */ \
/* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \
+M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc_req, nix_lf_alloc_rsp) \
+M(NIX_LF_FREE, 0x8001, msg_req, msg_rsp) \
+M(NIX_AQ_ENQ, 0x8002, nix_aq_enq_req, nix_aq_enq_rsp) \
+M(NIX_HWCTX_DISABLE, 0x8003, hwctx_disable_req, msg_rsp)
+
+/* Messages initiated by AF (range 0xC00 - 0xDFF) */
+#define MBOX_UP_CGX_MESSAGES \
+M(CGX_LINK_EVENT, 0xC00, cgx_link_info_msg, msg_rsp)
enum {
#define M(_name, _id, _1, _2) MBOX_MSG_ ## _name = _id,
MBOX_MESSAGES
+MBOX_UP_CGX_MESSAGES
#undef M
};
/* Mailbox message formats */
+#define RVU_DEFAULT_PF_FUNC 0xFFFF
+
/* Generic request msg used for those mbox messages which
* don't send any data in the request.
*/
@@ -208,4 +237,181 @@ struct msix_offset_rsp {
u16 cptlf_msixoff[MAX_RVU_BLKLF_CNT];
};
+/* CGX mbox message formats */
+
+struct cgx_stats_rsp {
+ struct mbox_msghdr hdr;
+#define CGX_RX_STATS_COUNT 13
+#define CGX_TX_STATS_COUNT 18
+ u64 rx_stats[CGX_RX_STATS_COUNT];
+ u64 tx_stats[CGX_TX_STATS_COUNT];
+};
+
+/* Structure for requesting the operation for
+ * setting/getting mac address in the CGX interface
+ */
+struct cgx_mac_addr_set_or_get {
+ struct mbox_msghdr hdr;
+ u8 mac_addr[ETH_ALEN];
+};
+
+struct cgx_link_user_info {
+ uint64_t link_up:1;
+ uint64_t full_duplex:1;
+ uint64_t lmac_type_id:4;
+ uint64_t speed:20; /* speed in Mbps */
+#define LMACTYPE_STR_LEN 16
+ char lmac_type[LMACTYPE_STR_LEN];
+};
+
+struct cgx_link_info_msg {
+ struct mbox_msghdr hdr;
+ struct cgx_link_user_info link_info;
+};
+
+/* NPA mbox message formats */
+
+/* NPA mailbox error codes
+ * Range 301 - 400.
+ */
+enum npa_af_status {
+ NPA_AF_ERR_PARAM = -301,
+ NPA_AF_ERR_AQ_FULL = -302,
+ NPA_AF_ERR_AQ_ENQUEUE = -303,
+ NPA_AF_ERR_AF_LF_INVALID = -304,
+ NPA_AF_ERR_AF_LF_ALLOC = -305,
+ NPA_AF_ERR_LF_RESET = -306,
+};
+
+/* For NPA LF context alloc and init */
+struct npa_lf_alloc_req {
+ struct mbox_msghdr hdr;
+ int node;
+ int aura_sz; /* No of auras */
+ u32 nr_pools; /* No of pools */
+};
+
+struct npa_lf_alloc_rsp {
+ struct mbox_msghdr hdr;
+ u32 stack_pg_ptrs; /* No of ptrs per stack page */
+ u32 stack_pg_bytes; /* Size of stack page */
+ u16 qints; /* NPA_AF_CONST::QINTS */
+};
+
+/* NPA AQ enqueue msg */
+struct npa_aq_enq_req {
+ struct mbox_msghdr hdr;
+ u32 aura_id;
+ u8 ctype;
+ u8 op;
+ union {
+ /* Valid when op == WRITE/INIT and ctype == AURA.
+ * LF fills the pool_id in aura.pool_addr. AF will translate
+ * the pool_id to pool context pointer.
+ */
+ struct npa_aura_s aura;
+ /* Valid when op == WRITE/INIT and ctype == POOL */
+ struct npa_pool_s pool;
+ };
+ /* Mask data when op == WRITE (1=write, 0=don't write) */
+ union {
+ /* Valid when op == WRITE and ctype == AURA */
+ struct npa_aura_s aura_mask;
+ /* Valid when op == WRITE and ctype == POOL */
+ struct npa_pool_s pool_mask;
+ };
+};
+
+struct npa_aq_enq_rsp {
+ struct mbox_msghdr hdr;
+ union {
+ /* Valid when op == READ and ctype == AURA */
+ struct npa_aura_s aura;
+ /* Valid when op == READ and ctype == POOL */
+ struct npa_pool_s pool;
+ };
+};
+
+/* Disable all contexts of type 'ctype' */
+struct hwctx_disable_req {
+ struct mbox_msghdr hdr;
+ u8 ctype;
+};
+
+/* NIX mailbox error codes
+ * Range 401 - 500.
+ */
+enum nix_af_status {
+ NIX_AF_ERR_PARAM = -401,
+ NIX_AF_ERR_AQ_FULL = -402,
+ NIX_AF_ERR_AQ_ENQUEUE = -403,
+ NIX_AF_ERR_AF_LF_INVALID = -404,
+ NIX_AF_ERR_AF_LF_ALLOC = -405,
+ NIX_AF_ERR_TLX_ALLOC_FAIL = -406,
+ NIX_AF_ERR_TLX_INVALID = -407,
+ NIX_AF_ERR_RSS_SIZE_INVALID = -408,
+ NIX_AF_ERR_RSS_GRPS_INVALID = -409,
+ NIX_AF_ERR_FRS_INVALID = -410,
+ NIX_AF_ERR_RX_LINK_INVALID = -411,
+ NIX_AF_INVAL_TXSCHQ_CFG = -412,
+ NIX_AF_SMQ_FLUSH_FAILED = -413,
+ NIX_AF_ERR_LF_RESET = -414,
+};
+
+/* For NIX LF context alloc and init */
+struct nix_lf_alloc_req {
+ struct mbox_msghdr hdr;
+ int node;
+ u32 rq_cnt; /* No of receive queues */
+ u32 sq_cnt; /* No of send queues */
+ u32 cq_cnt; /* No of completion queues */
+ u8 xqe_sz;
+ u16 rss_sz;
+ u8 rss_grps;
+ u16 npa_func;
+ u16 sso_func;
+ u64 rx_cfg; /* See NIX_AF_LF(0..127)_RX_CFG */
+};
+
+struct nix_lf_alloc_rsp {
+ struct mbox_msghdr hdr;
+ u16 sqb_size;
+ u8 lso_tsov4_idx;
+ u8 lso_tsov6_idx;
+ u8 mac_addr[ETH_ALEN];
+};
+
+/* NIX AQ enqueue msg */
+struct nix_aq_enq_req {
+ struct mbox_msghdr hdr;
+ u32 qidx;
+ u8 ctype;
+ u8 op;
+ union {
+ struct nix_rq_ctx_s rq;
+ struct nix_sq_ctx_s sq;
+ struct nix_cq_ctx_s cq;
+ struct nix_rsse_s rss;
+ struct nix_rx_mce_s mce;
+ };
+ union {
+ struct nix_rq_ctx_s rq_mask;
+ struct nix_sq_ctx_s sq_mask;
+ struct nix_cq_ctx_s cq_mask;
+ struct nix_rsse_s rss_mask;
+ struct nix_rx_mce_s mce_mask;
+ };
+};
+
+struct nix_aq_enq_rsp {
+ struct mbox_msghdr hdr;
+ union {
+ struct nix_rq_ctx_s rq;
+ struct nix_sq_ctx_s sq;
+ struct nix_cq_ctx_s cq;
+ struct nix_rsse_s rss;
+ struct nix_rx_mce_s mce;
+ };
+};
+
#endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index 2033f42c3226..c06cca9c9e1a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -47,18 +47,18 @@ MODULE_DEVICE_TABLE(pci, rvu_id_table);
*/
int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero)
{
+ unsigned long timeout = jiffies + usecs_to_jiffies(100);
void __iomem *reg;
- int timeout = 100;
u64 reg_val;
reg = rvu->afreg_base + ((block << 28) | offset);
- while (timeout) {
+ while (time_before(jiffies, timeout)) {
reg_val = readq(reg);
if (zero && !(reg_val & mask))
return 0;
if (!zero && (reg_val & mask))
return 0;
- usleep_range(1, 2);
+ usleep_range(1, 5);
timeout--;
}
return -EBUSY;
@@ -361,6 +361,19 @@ static void rvu_check_block_implemented(struct rvu *rvu)
}
}
+int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf)
+{
+ int err;
+
+ if (!block->implemented)
+ return 0;
+
+ rvu_write64(rvu, block->addr, block->lfreset_reg, lf | BIT_ULL(12));
+ err = rvu_poll_reg(rvu, block->addr, block->lfreset_reg, BIT_ULL(12),
+ true);
+ return err;
+}
+
static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
{
struct rvu_block *block = &rvu->hw->block[blkaddr];
@@ -552,6 +565,9 @@ static void rvu_free_hw_resources(struct rvu *rvu)
int id, max_msix;
u64 cfg;
+ rvu_npa_freemem(rvu);
+ rvu_nix_freemem(rvu);
+
/* Free block LF bitmaps */
for (id = 0; id < BLK_COUNT; id++) {
block = &hw->block[id];
@@ -755,6 +771,54 @@ init:
rvu_scan_block(rvu, block);
}
+ err = rvu_npa_init(rvu);
+ if (err)
+ return err;
+
+ err = rvu_nix_init(rvu);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/* NPA and NIX admin queue APIs */
+void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq)
+{
+ if (!aq)
+ return;
+
+ qmem_free(rvu->dev, aq->inst);
+ qmem_free(rvu->dev, aq->res);
+ devm_kfree(rvu->dev, aq);
+}
+
+int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
+ int qsize, int inst_size, int res_size)
+{
+ struct admin_queue *aq;
+ int err;
+
+ *ad_queue = devm_kzalloc(rvu->dev, sizeof(*aq), GFP_KERNEL);
+ if (!*ad_queue)
+ return -ENOMEM;
+ aq = *ad_queue;
+
+ /* Alloc memory for instructions i.e AQ */
+ err = qmem_alloc(rvu->dev, &aq->inst, qsize, inst_size);
+ if (err) {
+ devm_kfree(rvu->dev, aq);
+ return err;
+ }
+
+ /* Alloc memory for results */
+ err = qmem_alloc(rvu->dev, &aq->res, qsize, res_size);
+ if (err) {
+ rvu_aq_free(rvu, aq);
+ return err;
+ }
+
+ spin_lock_init(&aq->lock);
return 0;
}
@@ -1316,6 +1380,63 @@ static void rvu_mbox_handler(struct work_struct *work)
otx2_mbox_msg_send(mbox, pf);
}
+static void rvu_mbox_up_handler(struct work_struct *work)
+{
+ struct rvu_work *mwork = container_of(work, struct rvu_work, work);
+ struct rvu *rvu = mwork->rvu;
+ struct otx2_mbox_dev *mdev;
+ struct mbox_hdr *rsp_hdr;
+ struct mbox_msghdr *msg;
+ struct otx2_mbox *mbox;
+ int offset, id;
+ u16 pf;
+
+ mbox = &rvu->mbox_up;
+ pf = mwork - rvu->mbox_wrk_up;
+ mdev = &mbox->dev[pf];
+
+ rsp_hdr = mdev->mbase + mbox->rx_start;
+ if (rsp_hdr->num_msgs == 0) {
+ dev_warn(rvu->dev, "mbox up handler: num_msgs = 0\n");
+ return;
+ }
+
+ offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN);
+
+ for (id = 0; id < rsp_hdr->num_msgs; id++) {
+ msg = mdev->mbase + offset;
+
+ if (msg->id >= MBOX_MSG_MAX) {
+ dev_err(rvu->dev,
+ "Mbox msg with unknown ID 0x%x\n", msg->id);
+ goto end;
+ }
+
+ if (msg->sig != OTX2_MBOX_RSP_SIG) {
+ dev_err(rvu->dev,
+ "Mbox msg with wrong signature %x, ID 0x%x\n",
+ msg->sig, msg->id);
+ goto end;
+ }
+
+ switch (msg->id) {
+ case MBOX_MSG_CGX_LINK_EVENT:
+ break;
+ default:
+ if (msg->rc)
+ dev_err(rvu->dev,
+ "Mbox msg response has err %d, ID 0x%x\n",
+ msg->rc, msg->id);
+ break;
+ }
+end:
+ offset = mbox->rx_start + msg->next_msgoff;
+ mdev->msgs_acked++;
+ }
+
+ otx2_mbox_reset(mbox, 0);
+}
+
static int rvu_mbox_init(struct rvu *rvu)
{
struct rvu_hwinfo *hw = rvu->hw;
@@ -1337,6 +1458,13 @@ static int rvu_mbox_init(struct rvu *rvu)
goto exit;
}
+ rvu->mbox_wrk_up = devm_kcalloc(rvu->dev, hw->total_pfs,
+ sizeof(struct rvu_work), GFP_KERNEL);
+ if (!rvu->mbox_wrk_up) {
+ err = -ENOMEM;
+ goto exit;
+ }
+
/* Map mbox region shared with PFs */
bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR);
/* Mailbox is a reserved memory (in RAM) region shared between
@@ -1355,12 +1483,23 @@ static int rvu_mbox_init(struct rvu *rvu)
if (err)
goto exit;
+ err = otx2_mbox_init(&rvu->mbox_up, hwbase, rvu->pdev, rvu->afreg_base,
+ MBOX_DIR_AFPF_UP, hw->total_pfs);
+ if (err)
+ goto exit;
+
for (pf = 0; pf < hw->total_pfs; pf++) {
mwork = &rvu->mbox_wrk[pf];
mwork->rvu = rvu;
INIT_WORK(&mwork->work, rvu_mbox_handler);
}
+ for (pf = 0; pf < hw->total_pfs; pf++) {
+ mwork = &rvu->mbox_wrk_up[pf];
+ mwork->rvu = rvu;
+ INIT_WORK(&mwork->work, rvu_mbox_up_handler);
+ }
+
return 0;
exit:
if (hwbase)
@@ -1381,6 +1520,7 @@ static void rvu_mbox_destroy(struct rvu *rvu)
iounmap((void __iomem *)rvu->mbox.hwbase);
otx2_mbox_destroy(&rvu->mbox);
+ otx2_mbox_destroy(&rvu->mbox_up);
}
static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
@@ -1407,6 +1547,12 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
if (hdr->num_msgs)
queue_work(rvu->mbox_wq,
&rvu->mbox_wrk[pf].work);
+ mbox = &rvu->mbox_up;
+ mdev = &mbox->dev[pf];
+ hdr = mdev->mbase + mbox->rx_start;
+ if (hdr->num_msgs)
+ queue_work(rvu->mbox_wq,
+ &rvu->mbox_wrk_up[pf].work);
}
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index d169fa9eb45e..b48b5af83f1d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -12,6 +12,7 @@
#define RVU_H
#include "rvu_struct.h"
+#include "common.h"
#include "mbox.h"
/* PCI device IDs */
@@ -41,7 +42,8 @@ struct rsrc_bmap {
};
struct rvu_block {
- struct rsrc_bmap lf;
+ struct rsrc_bmap lf;
+ struct admin_queue *aq; /* NIX/NPA AQ */
u16 *fn_map; /* LF to pcifunc mapping */
bool multislot;
bool implemented;
@@ -70,14 +72,50 @@ struct rvu_pfvf {
struct rsrc_bmap msix; /* Bitmap for MSIX vector alloc */
#define MSIX_BLKLF(blkaddr, lf) (((blkaddr) << 8) | ((lf) & 0xFF))
u16 *msix_lfmap; /* Vector to block LF mapping */
+
+ /* NPA contexts */
+ struct qmem *aura_ctx;
+ struct qmem *pool_ctx;
+ struct qmem *npa_qints_ctx;
+ unsigned long *aura_bmap;
+ unsigned long *pool_bmap;
+
+ /* NIX contexts */
+ struct qmem *rq_ctx;
+ struct qmem *sq_ctx;
+ struct qmem *cq_ctx;
+ struct qmem *rss_ctx;
+ struct qmem *cq_ints_ctx;
+ struct qmem *nix_qints_ctx;
+ unsigned long *sq_bmap;
+ unsigned long *rq_bmap;
+ unsigned long *cq_bmap;
+
+ u8 mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */
+};
+
+struct nix_txsch {
+ struct rsrc_bmap schq;
+ u8 lvl;
+ u16 *pfvf_map;
+};
+
+struct nix_hw {
+ struct nix_txsch txsch[NIX_TXSCH_LVL_CNT]; /* Tx schedulers */
};
struct rvu_hwinfo {
u8 total_pfs; /* MAX RVU PFs HW supports */
u16 total_vfs; /* Max RVU VFs HW supports */
u16 max_vfs_per_pf; /* Max VFs that can be attached to a PF */
+ u8 cgx;
+ u8 lmac_per_cgx;
+ u8 cgx_links;
+ u8 lbk_links;
+ u8 sdp_links;
struct rvu_block block[BLK_COUNT]; /* Block info */
+ struct nix_hw *nix0;
};
struct rvu {
@@ -93,6 +131,8 @@ struct rvu {
/* Mbox */
struct otx2_mbox mbox;
struct rvu_work *mbox_wrk;
+ struct otx2_mbox mbox_up;
+ struct rvu_work *mbox_wrk_up;
struct workqueue_struct *mbox_wq;
/* MSI-X */
@@ -109,6 +149,7 @@ struct rvu {
u16 *cgxlmac2pf_map; /* bitmap of mapped pfs for
* every cgx lmac port
*/
+ unsigned long pf_notify_bmap; /* Flags for PF notification */
void **cgx_idmap; /* cgx id to cgx data map table */
struct work_struct cgx_evh_work;
struct workqueue_struct *cgx_evh_wq;
@@ -149,10 +190,84 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc);
void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf);
bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr);
int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot);
+int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf);
int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc);
int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero);
+/* NPA/NIX AQ APIs */
+int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
+ int qsize, int inst_size, int res_size);
+void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq);
+
/* CGX APIs */
+static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf)
+{
+ return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs);
+}
+
+static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id)
+{
+ *cgx_id = (map >> 4) & 0xF;
+ *lmac_id = (map & 0xF);
+}
+
int rvu_cgx_probe(struct rvu *rvu);
void rvu_cgx_wq_destroy(struct rvu *rvu);
+int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start);
+int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req,
+ struct cgx_stats_rsp *rsp);
+int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu,
+ struct cgx_mac_addr_set_or_get *req,
+ struct cgx_mac_addr_set_or_get *rsp);
+int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu,
+ struct cgx_mac_addr_set_or_get *req,
+ struct cgx_mac_addr_set_or_get *rsp);
+int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req,
+ struct cgx_link_info_msg *rsp);
+int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+
+/* NPA APIs */
+int rvu_npa_init(struct rvu *rvu);
+void rvu_npa_freemem(struct rvu *rvu);
+int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu,
+ struct npa_aq_enq_req *req,
+ struct npa_aq_enq_rsp *rsp);
+int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu,
+ struct hwctx_disable_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu,
+ struct npa_lf_alloc_req *req,
+ struct npa_lf_alloc_rsp *rsp);
+int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+
+/* NIX APIs */
+int rvu_nix_init(struct rvu *rvu);
+void rvu_nix_freemem(struct rvu *rvu);
+int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
+ struct nix_lf_alloc_req *req,
+ struct nix_lf_alloc_rsp *rsp);
+int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp);
+int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu,
+ struct nix_aq_enq_req *req,
+ struct nix_aq_enq_rsp *rsp);
+int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu,
+ struct hwctx_disable_req *req,
+ struct msg_rsp *rsp);
#endif /* RVU_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 5ecc22308b22..e0aee2176637 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -20,6 +20,31 @@ struct cgx_evq_entry {
struct cgx_link_event link_event;
};
+#define M(_name, _id, _req_type, _rsp_type) \
+static struct _req_type __maybe_unused \
+*otx2_mbox_alloc_msg_ ## _name(struct rvu *rvu, int devid) \
+{ \
+ struct _req_type *req; \
+ \
+ req = (struct _req_type *)otx2_mbox_alloc_msg_rsp( \
+ &rvu->mbox_up, devid, sizeof(struct _req_type), \
+ sizeof(struct _rsp_type)); \
+ if (!req) \
+ return NULL; \
+ req->hdr.sig = OTX2_MBOX_REQ_SIG; \
+ req->hdr.id = _id; \
+ return req; \
+}
+
+MBOX_UP_CGX_MESSAGES
+#undef M
+
+/* Returns bitmap of mapped PFs */
+static inline u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id)
+{
+ return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id];
+}
+
static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id)
{
return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF);
@@ -77,6 +102,34 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
return 0;
}
+static int rvu_cgx_send_link_info(int cgx_id, int lmac_id, struct rvu *rvu)
+{
+ struct cgx_evq_entry *qentry;
+ unsigned long flags;
+ int err;
+
+ qentry = kmalloc(sizeof(*qentry), GFP_KERNEL);
+ if (!qentry)
+ return -ENOMEM;
+
+ /* Lock the event queue before we read the local link status */
+ spin_lock_irqsave(&rvu->cgx_evq_lock, flags);
+ err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
+ &qentry->link_event.link_uinfo);
+ qentry->link_event.cgx_id = cgx_id;
+ qentry->link_event.lmac_id = lmac_id;
+ if (err)
+ goto skip_add;
+ list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head);
+skip_add:
+ spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags);
+
+ /* start worker to process the events */
+ queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work);
+
+ return 0;
+}
+
/* This is called from interrupt context and is expected to be atomic */
static int cgx_lmac_postevent(struct cgx_link_event *event, void *data)
{
@@ -98,6 +151,41 @@ static int cgx_lmac_postevent(struct cgx_link_event *event, void *data)
return 0;
}
+static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
+{
+ struct cgx_link_user_info *linfo;
+ struct cgx_link_info_msg *msg;
+ unsigned long pfmap;
+ int err, pfid;
+
+ linfo = &event->link_uinfo;
+ pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id);
+
+ do {
+ pfid = find_first_bit(&pfmap, 16);
+ clear_bit(pfid, &pfmap);
+
+ /* check if notification is enabled */
+ if (!test_bit(pfid, &rvu->pf_notify_bmap)) {
+ dev_info(rvu->dev, "cgx %d: lmac %d Link status %s\n",
+ event->cgx_id, event->lmac_id,
+ linfo->link_up ? "UP" : "DOWN");
+ continue;
+ }
+
+ /* Send mbox message to PF */
+ msg = otx2_mbox_alloc_msg_CGX_LINK_EVENT(rvu, pfid);
+ if (!msg)
+ continue;
+ msg->link_info = *linfo;
+ otx2_mbox_msg_send(&rvu->mbox_up, pfid);
+ err = otx2_mbox_wait_for_rsp(&rvu->mbox_up, pfid);
+ if (err)
+ dev_warn(rvu->dev, "notification to pf %d failed\n",
+ pfid);
+ } while (pfmap);
+}
+
static void cgx_evhandler_task(struct work_struct *work)
{
struct rvu *rvu = container_of(work, struct rvu, cgx_evh_work);
@@ -119,7 +207,8 @@ static void cgx_evhandler_task(struct work_struct *work)
event = &qentry->link_event;
- /* Do nothing for now */
+ /* process event */
+ cgx_notify_pfs(event, rvu);
kfree(qentry);
} while (1);
}
@@ -192,3 +281,232 @@ int rvu_cgx_probe(struct rvu *rvu)
cgx_lmac_event_handler_init(rvu);
return 0;
}
+
+int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
+{
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id, lmac_id;
+
+ /* This msg is expected only from PFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start);
+
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, true);
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, false);
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req,
+ struct cgx_stats_rsp *rsp)
+{
+ int pf = rvu_get_pf(req->hdr.pcifunc);
+ int stat = 0, err = 0;
+ u64 tx_stat, rx_stat;
+ u8 cgx_idx, lmac;
+ void *cgxd;
+
+ if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+ !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
+ cgxd = rvu_cgx_pdata(cgx_idx, rvu);
+
+ /* Rx stats */
+ while (stat < CGX_RX_STATS_COUNT) {
+ err = cgx_get_rx_stats(cgxd, lmac, stat, &rx_stat);
+ if (err)
+ return err;
+ rsp->rx_stats[stat] = rx_stat;
+ stat++;
+ }
+
+ /* Tx stats */
+ stat = 0;
+ while (stat < CGX_TX_STATS_COUNT) {
+ err = cgx_get_tx_stats(cgxd, lmac, stat, &tx_stat);
+ if (err)
+ return err;
+ rsp->tx_stats[stat] = tx_stat;
+ stat++;
+ }
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu,
+ struct cgx_mac_addr_set_or_get *req,
+ struct cgx_mac_addr_set_or_get *rsp)
+{
+ int pf = rvu_get_pf(req->hdr.pcifunc);
+ u8 cgx_id, lmac_id;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr);
+
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu,
+ struct cgx_mac_addr_set_or_get *req,
+ struct cgx_mac_addr_set_or_get *rsp)
+{
+ int pf = rvu_get_pf(req->hdr.pcifunc);
+ u8 cgx_id, lmac_id;
+ int rc = 0, i;
+ u64 cfg;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ rsp->hdr.rc = rc;
+ cfg = cgx_lmac_addr_get(cgx_id, lmac_id);
+ /* copy 48 bit mac address to req->mac_addr */
+ for (i = 0; i < ETH_ALEN; i++)
+ rsp->mac_addr[i] = cfg >> (ETH_ALEN - 1 - i) * 8;
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ u16 pcifunc = req->hdr.pcifunc;
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id, lmac_id;
+
+ /* This msg is expected only from PFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+ !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ cgx_lmac_promisc_config(cgx_id, lmac_id, true);
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ u16 pcifunc = req->hdr.pcifunc;
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id, lmac_id;
+
+ /* This msg is expected only from PFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) ||
+ !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ cgx_lmac_promisc_config(cgx_id, lmac_id, false);
+ return 0;
+}
+
+static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en)
+{
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id, lmac_id;
+
+ /* This msg is expected only from PFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ if (en) {
+ set_bit(pf, &rvu->pf_notify_bmap);
+ /* Send the current link status to PF */
+ rvu_cgx_send_link_info(cgx_id, lmac_id, rvu);
+ } else {
+ clear_bit(pf, &rvu->pf_notify_bmap);
+ }
+
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, true);
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, false);
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req,
+ struct cgx_link_info_msg *rsp)
+{
+ u8 cgx_id, lmac_id;
+ int pf, err;
+
+ pf = rvu_get_pf(req->hdr.pcifunc);
+
+ if (!is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id,
+ &rsp->link_info);
+ return err;
+}
+
+static int rvu_cgx_config_intlbk(struct rvu *rvu, u16 pcifunc, bool en)
+{
+ int pf = rvu_get_pf(pcifunc);
+ u8 cgx_id, lmac_id;
+
+ /* This msg is expected only from PFs that are mapped to CGX LMACs,
+ * if received from other PF/VF simply ACK, nothing to do.
+ */
+ if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf))
+ return -ENODEV;
+
+ rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+
+ return cgx_lmac_internal_loopback(rvu_cgx_pdata(cgx_id, rvu),
+ lmac_id, en);
+}
+
+int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, true);
+ return 0;
+}
+
+int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, false);
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
new file mode 100644
index 000000000000..214ca2c26ab4
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -0,0 +1,892 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+#include "cgx.h"
+
+static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
+{
+ if (blkaddr == BLKADDR_NIX0 && hw->nix0)
+ return hw->nix0;
+
+ return NULL;
+}
+
+static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
+ int lvl, u16 pcifunc, u16 schq)
+{
+ struct nix_txsch *txsch;
+ struct nix_hw *nix_hw;
+
+ nix_hw = get_nix_hw(rvu->hw, blkaddr);
+ if (!nix_hw)
+ return false;
+
+ txsch = &nix_hw->txsch[lvl];
+ /* Check out of bounds */
+ if (schq >= txsch->schq.max)
+ return false;
+
+ spin_lock(&rvu->rsrc_lock);
+ if (txsch->pfvf_map[schq] != pcifunc) {
+ spin_unlock(&rvu->rsrc_lock);
+ return false;
+ }
+ spin_unlock(&rvu->rsrc_lock);
+ return true;
+}
+
+static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr,
+ u64 format, bool v4, u64 *fidx)
+{
+ struct nix_lso_format field = {0};
+
+ /* IP's Length field */
+ field.layer = NIX_TXLAYER_OL3;
+ /* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */
+ field.offset = v4 ? 2 : 4;
+ field.sizem1 = 1; /* i.e 2 bytes */
+ field.alg = NIX_LSOALG_ADD_PAYLEN;
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+ *(u64 *)&field);
+
+ /* No ID field in IPv6 header */
+ if (!v4)
+ return;
+
+ /* IP's ID field */
+ field.layer = NIX_TXLAYER_OL3;
+ field.offset = 4;
+ field.sizem1 = 1; /* i.e 2 bytes */
+ field.alg = NIX_LSOALG_ADD_SEGNUM;
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+ *(u64 *)&field);
+}
+
+static void nix_setup_lso_tso_l4(struct rvu *rvu, int blkaddr,
+ u64 format, u64 *fidx)
+{
+ struct nix_lso_format field = {0};
+
+ /* TCP's sequence number field */
+ field.layer = NIX_TXLAYER_OL4;
+ field.offset = 4;
+ field.sizem1 = 3; /* i.e 4 bytes */
+ field.alg = NIX_LSOALG_ADD_OFFSET;
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+ *(u64 *)&field);
+
+ /* TCP's flags field */
+ field.layer = NIX_TXLAYER_OL4;
+ field.offset = 12;
+ field.sizem1 = 0; /* not needed */
+ field.alg = NIX_LSOALG_TCP_FLAGS;
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++),
+ *(u64 *)&field);
+}
+
+static void nix_setup_lso(struct rvu *rvu, int blkaddr)
+{
+ u64 cfg, idx, fidx = 0;
+
+ /* Enable LSO */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_LSO_CFG);
+ /* For TSO, set first and middle segment flags to
+ * mask out PSH, RST & FIN flags in TCP packet
+ */
+ cfg &= ~((0xFFFFULL << 32) | (0xFFFFULL << 16));
+ cfg |= (0xFFF2ULL << 32) | (0xFFF2ULL << 16);
+ rvu_write64(rvu, blkaddr, NIX_AF_LSO_CFG, cfg | BIT_ULL(63));
+
+ /* Configure format fields for TCPv4 segmentation offload */
+ idx = NIX_LSO_FORMAT_IDX_TSOV4;
+ nix_setup_lso_tso_l3(rvu, blkaddr, idx, true, &fidx);
+ nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx);
+
+ /* Set rest of the fields to NOP */
+ for (; fidx < 8; fidx++) {
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL);
+ }
+
+ /* Configure format fields for TCPv6 segmentation offload */
+ idx = NIX_LSO_FORMAT_IDX_TSOV6;
+ fidx = 0;
+ nix_setup_lso_tso_l3(rvu, blkaddr, idx, false, &fidx);
+ nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx);
+
+ /* Set rest of the fields to NOP */
+ for (; fidx < 8; fidx++) {
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL);
+ }
+}
+
+static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
+{
+ kfree(pfvf->rq_bmap);
+ kfree(pfvf->sq_bmap);
+ kfree(pfvf->cq_bmap);
+ if (pfvf->rq_ctx)
+ qmem_free(rvu->dev, pfvf->rq_ctx);
+ if (pfvf->sq_ctx)
+ qmem_free(rvu->dev, pfvf->sq_ctx);
+ if (pfvf->cq_ctx)
+ qmem_free(rvu->dev, pfvf->cq_ctx);
+ if (pfvf->rss_ctx)
+ qmem_free(rvu->dev, pfvf->rss_ctx);
+ if (pfvf->nix_qints_ctx)
+ qmem_free(rvu->dev, pfvf->nix_qints_ctx);
+ if (pfvf->cq_ints_ctx)
+ qmem_free(rvu->dev, pfvf->cq_ints_ctx);
+
+ pfvf->rq_bmap = NULL;
+ pfvf->cq_bmap = NULL;
+ pfvf->sq_bmap = NULL;
+ pfvf->rq_ctx = NULL;
+ pfvf->sq_ctx = NULL;
+ pfvf->cq_ctx = NULL;
+ pfvf->rss_ctx = NULL;
+ pfvf->nix_qints_ctx = NULL;
+ pfvf->cq_ints_ctx = NULL;
+}
+
+static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
+ struct rvu_pfvf *pfvf, int nixlf,
+ int rss_sz, int rss_grps, int hwctx_size)
+{
+ int err, grp, num_indices;
+
+ /* RSS is not requested for this NIXLF */
+ if (!rss_sz)
+ return 0;
+ num_indices = rss_sz * rss_grps;
+
+ /* Alloc NIX RSS HW context memory and config the base */
+ err = qmem_alloc(rvu->dev, &pfvf->rss_ctx, num_indices, hwctx_size);
+ if (err)
+ return err;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_BASE(nixlf),
+ (u64)pfvf->rss_ctx->iova);
+
+ /* Config full RSS table size, enable RSS and caching */
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf),
+ BIT_ULL(36) | BIT_ULL(4) |
+ ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE));
+ /* Config RSS group offset and sizes */
+ for (grp = 0; grp < rss_grps; grp++)
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp),
+ ((ilog2(rss_sz) - 1) << 16) | (rss_sz * grp));
+ return 0;
+}
+
+static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+ struct nix_aq_inst_s *inst)
+{
+ struct admin_queue *aq = block->aq;
+ struct nix_aq_res_s *result;
+ int timeout = 1000;
+ u64 reg, head;
+
+ result = (struct nix_aq_res_s *)aq->res->base;
+
+ /* Get current head pointer where to append this instruction */
+ reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS);
+ head = (reg >> 4) & AQ_PTR_MASK;
+
+ memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)),
+ (void *)inst, aq->inst->entry_sz);
+ memset(result, 0, sizeof(*result));
+ /* sync into memory */
+ wmb();
+
+ /* Ring the doorbell and wait for result */
+ rvu_write64(rvu, block->addr, NIX_AF_AQ_DOOR, 1);
+ while (result->compcode == NIX_AQ_COMP_NOTDONE) {
+ cpu_relax();
+ udelay(1);
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ }
+
+ if (result->compcode != NIX_AQ_COMP_GOOD)
+ /* TODO: Replace this with some error code */
+ return -EBUSY;
+
+ return 0;
+}
+
+static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+ struct nix_aq_enq_rsp *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ int nixlf, blkaddr, rc = 0;
+ struct nix_aq_inst_s inst;
+ struct rvu_block *block;
+ struct admin_queue *aq;
+ struct rvu_pfvf *pfvf;
+ void *ctx, *mask;
+ bool ena;
+ u64 cfg;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ if (!pfvf->nixlf || blkaddr < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ aq = block->aq;
+ if (!aq) {
+ dev_warn(rvu->dev, "%s: NIX AQ not initialized\n", __func__);
+ return NIX_AF_ERR_AQ_ENQUEUE;
+ }
+
+ nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (nixlf < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ switch (req->ctype) {
+ case NIX_AQ_CTYPE_RQ:
+ /* Check if index exceeds max no of queues */
+ if (!pfvf->rq_ctx || req->qidx >= pfvf->rq_ctx->qsize)
+ rc = NIX_AF_ERR_AQ_ENQUEUE;
+ break;
+ case NIX_AQ_CTYPE_SQ:
+ if (!pfvf->sq_ctx || req->qidx >= pfvf->sq_ctx->qsize)
+ rc = NIX_AF_ERR_AQ_ENQUEUE;
+ break;
+ case NIX_AQ_CTYPE_CQ:
+ if (!pfvf->cq_ctx || req->qidx >= pfvf->cq_ctx->qsize)
+ rc = NIX_AF_ERR_AQ_ENQUEUE;
+ break;
+ case NIX_AQ_CTYPE_RSS:
+ /* Check if RSS is enabled and qidx is within range */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf));
+ if (!(cfg & BIT_ULL(4)) || !pfvf->rss_ctx ||
+ (req->qidx >= (256UL << (cfg & 0xF))))
+ rc = NIX_AF_ERR_AQ_ENQUEUE;
+ break;
+ default:
+ rc = NIX_AF_ERR_AQ_ENQUEUE;
+ }
+
+ if (rc)
+ return rc;
+
+ /* Check if SQ pointed SMQ belongs to this PF/VF or not */
+ if (req->ctype == NIX_AQ_CTYPE_SQ &&
+ req->op != NIX_AQ_INSTOP_WRITE) {
+ if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
+ pcifunc, req->sq.smq))
+ return NIX_AF_ERR_AQ_ENQUEUE;
+ }
+
+ memset(&inst, 0, sizeof(struct nix_aq_inst_s));
+ inst.lf = nixlf;
+ inst.cindex = req->qidx;
+ inst.ctype = req->ctype;
+ inst.op = req->op;
+ /* Currently we are not supporting enqueuing multiple instructions,
+ * so always choose first entry in result memory.
+ */
+ inst.res_addr = (u64)aq->res->iova;
+
+ /* Clean result + context memory */
+ memset(aq->res->base, 0, aq->res->entry_sz);
+ /* Context needs to be written at RES_ADDR + 128 */
+ ctx = aq->res->base + 128;
+ /* Mask needs to be written at RES_ADDR + 256 */
+ mask = aq->res->base + 256;
+
+ switch (req->op) {
+ case NIX_AQ_INSTOP_WRITE:
+ if (req->ctype == NIX_AQ_CTYPE_RQ)
+ memcpy(mask, &req->rq_mask,
+ sizeof(struct nix_rq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_SQ)
+ memcpy(mask, &req->sq_mask,
+ sizeof(struct nix_sq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_CQ)
+ memcpy(mask, &req->cq_mask,
+ sizeof(struct nix_cq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_RSS)
+ memcpy(mask, &req->rss_mask,
+ sizeof(struct nix_rsse_s));
+ /* Fall through */
+ case NIX_AQ_INSTOP_INIT:
+ if (req->ctype == NIX_AQ_CTYPE_RQ)
+ memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_SQ)
+ memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_CQ)
+ memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_RSS)
+ memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s));
+ break;
+ case NIX_AQ_INSTOP_NOP:
+ case NIX_AQ_INSTOP_READ:
+ case NIX_AQ_INSTOP_LOCK:
+ case NIX_AQ_INSTOP_UNLOCK:
+ break;
+ default:
+ rc = NIX_AF_ERR_AQ_ENQUEUE;
+ return rc;
+ }
+
+ spin_lock(&aq->lock);
+
+ /* Submit the instruction to AQ */
+ rc = nix_aq_enqueue_wait(rvu, block, &inst);
+ if (rc) {
+ spin_unlock(&aq->lock);
+ return rc;
+ }
+
+ /* Set RQ/SQ/CQ bitmap if respective queue hw context is enabled */
+ if (req->op == NIX_AQ_INSTOP_INIT) {
+ if (req->ctype == NIX_AQ_CTYPE_RQ && req->rq.ena)
+ __set_bit(req->qidx, pfvf->rq_bmap);
+ if (req->ctype == NIX_AQ_CTYPE_SQ && req->sq.ena)
+ __set_bit(req->qidx, pfvf->sq_bmap);
+ if (req->ctype == NIX_AQ_CTYPE_CQ && req->cq.ena)
+ __set_bit(req->qidx, pfvf->cq_bmap);
+ }
+
+ if (req->op == NIX_AQ_INSTOP_WRITE) {
+ if (req->ctype == NIX_AQ_CTYPE_RQ) {
+ ena = (req->rq.ena & req->rq_mask.ena) |
+ (test_bit(req->qidx, pfvf->rq_bmap) &
+ ~req->rq_mask.ena);
+ if (ena)
+ __set_bit(req->qidx, pfvf->rq_bmap);
+ else
+ __clear_bit(req->qidx, pfvf->rq_bmap);
+ }
+ if (req->ctype == NIX_AQ_CTYPE_SQ) {
+ ena = (req->rq.ena & req->sq_mask.ena) |
+ (test_bit(req->qidx, pfvf->sq_bmap) &
+ ~req->sq_mask.ena);
+ if (ena)
+ __set_bit(req->qidx, pfvf->sq_bmap);
+ else
+ __clear_bit(req->qidx, pfvf->sq_bmap);
+ }
+ if (req->ctype == NIX_AQ_CTYPE_CQ) {
+ ena = (req->rq.ena & req->cq_mask.ena) |
+ (test_bit(req->qidx, pfvf->cq_bmap) &
+ ~req->cq_mask.ena);
+ if (ena)
+ __set_bit(req->qidx, pfvf->cq_bmap);
+ else
+ __clear_bit(req->qidx, pfvf->cq_bmap);
+ }
+ }
+
+ if (rsp) {
+ /* Copy read context into mailbox */
+ if (req->op == NIX_AQ_INSTOP_READ) {
+ if (req->ctype == NIX_AQ_CTYPE_RQ)
+ memcpy(&rsp->rq, ctx,
+ sizeof(struct nix_rq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_SQ)
+ memcpy(&rsp->sq, ctx,
+ sizeof(struct nix_sq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_CQ)
+ memcpy(&rsp->cq, ctx,
+ sizeof(struct nix_cq_ctx_s));
+ else if (req->ctype == NIX_AQ_CTYPE_RSS)
+ memcpy(&rsp->rss, ctx,
+ sizeof(struct nix_cq_ctx_s));
+ }
+ }
+
+ spin_unlock(&aq->lock);
+ return 0;
+}
+
+static int nix_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req)
+{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+ struct nix_aq_enq_req aq_req;
+ unsigned long *bmap;
+ int qidx, q_cnt = 0;
+ int err = 0, rc;
+
+ if (!pfvf->cq_ctx || !pfvf->sq_ctx || !pfvf->rq_ctx)
+ return NIX_AF_ERR_AQ_ENQUEUE;
+
+ memset(&aq_req, 0, sizeof(struct nix_aq_enq_req));
+ aq_req.hdr.pcifunc = req->hdr.pcifunc;
+
+ if (req->ctype == NIX_AQ_CTYPE_CQ) {
+ aq_req.cq.ena = 0;
+ aq_req.cq_mask.ena = 1;
+ q_cnt = pfvf->cq_ctx->qsize;
+ bmap = pfvf->cq_bmap;
+ }
+ if (req->ctype == NIX_AQ_CTYPE_SQ) {
+ aq_req.sq.ena = 0;
+ aq_req.sq_mask.ena = 1;
+ q_cnt = pfvf->sq_ctx->qsize;
+ bmap = pfvf->sq_bmap;
+ }
+ if (req->ctype == NIX_AQ_CTYPE_RQ) {
+ aq_req.rq.ena = 0;
+ aq_req.rq_mask.ena = 1;
+ q_cnt = pfvf->rq_ctx->qsize;
+ bmap = pfvf->rq_bmap;
+ }
+
+ aq_req.ctype = req->ctype;
+ aq_req.op = NIX_AQ_INSTOP_WRITE;
+
+ for (qidx = 0; qidx < q_cnt; qidx++) {
+ if (!test_bit(qidx, bmap))
+ continue;
+ aq_req.qidx = qidx;
+ rc = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL);
+ if (rc) {
+ err = rc;
+ dev_err(rvu->dev, "Failed to disable %s:%d context\n",
+ (req->ctype == NIX_AQ_CTYPE_CQ) ?
+ "CQ" : ((req->ctype == NIX_AQ_CTYPE_RQ) ?
+ "RQ" : "SQ"), qidx);
+ }
+ }
+
+ return err;
+}
+
+int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu,
+ struct nix_aq_enq_req *req,
+ struct nix_aq_enq_rsp *rsp)
+{
+ return rvu_nix_aq_enq_inst(rvu, req, rsp);
+}
+
+int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu,
+ struct hwctx_disable_req *req,
+ struct msg_rsp *rsp)
+{
+ return nix_lf_hwctx_disable(rvu, req);
+}
+
+int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu,
+ struct nix_lf_alloc_req *req,
+ struct nix_lf_alloc_rsp *rsp)
+{
+ int nixlf, qints, hwctx_size, err, rc = 0;
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ struct rvu_block *block;
+ struct rvu_pfvf *pfvf;
+ u64 cfg, ctx_cfg;
+ int blkaddr;
+
+ if (!req->rq_cnt || !req->sq_cnt || !req->cq_cnt)
+ return NIX_AF_ERR_PARAM;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ if (!pfvf->nixlf || blkaddr < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (nixlf < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ /* If RSS is being enabled, check if requested config is valid.
+ * RSS table size should be power of two, otherwise
+ * RSS_GRP::OFFSET + adder might go beyond that group or
+ * won't be able to use entire table.
+ */
+ if (req->rss_sz && (req->rss_sz > MAX_RSS_INDIR_TBL_SIZE ||
+ !is_power_of_2(req->rss_sz)))
+ return NIX_AF_ERR_RSS_SIZE_INVALID;
+
+ if (req->rss_sz &&
+ (!req->rss_grps || req->rss_grps > MAX_RSS_GROUPS))
+ return NIX_AF_ERR_RSS_GRPS_INVALID;
+
+ /* Reset this NIX LF */
+ err = rvu_lf_reset(rvu, block, nixlf);
+ if (err) {
+ dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n",
+ block->addr - BLKADDR_NIX0, nixlf);
+ return NIX_AF_ERR_LF_RESET;
+ }
+
+ ctx_cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST3);
+
+ /* Alloc NIX RQ HW context memory and config the base */
+ hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->rq_ctx, req->rq_cnt, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ pfvf->rq_bmap = kcalloc(req->rq_cnt, sizeof(long), GFP_KERNEL);
+ if (!pfvf->rq_bmap)
+ goto free_mem;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_BASE(nixlf),
+ (u64)pfvf->rq_ctx->iova);
+
+ /* Set caching and queue count in HW */
+ cfg = BIT_ULL(36) | (req->rq_cnt - 1);
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_CFG(nixlf), cfg);
+
+ /* Alloc NIX SQ HW context memory and config the base */
+ hwctx_size = 1UL << (ctx_cfg & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->sq_ctx, req->sq_cnt, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ pfvf->sq_bmap = kcalloc(req->sq_cnt, sizeof(long), GFP_KERNEL);
+ if (!pfvf->sq_bmap)
+ goto free_mem;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_BASE(nixlf),
+ (u64)pfvf->sq_ctx->iova);
+ cfg = BIT_ULL(36) | (req->sq_cnt - 1);
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_CFG(nixlf), cfg);
+
+ /* Alloc NIX CQ HW context memory and config the base */
+ hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->cq_ctx, req->cq_cnt, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ pfvf->cq_bmap = kcalloc(req->cq_cnt, sizeof(long), GFP_KERNEL);
+ if (!pfvf->cq_bmap)
+ goto free_mem;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_BASE(nixlf),
+ (u64)pfvf->cq_ctx->iova);
+ cfg = BIT_ULL(36) | (req->cq_cnt - 1);
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_CFG(nixlf), cfg);
+
+ /* Initialize receive side scaling (RSS) */
+ hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF);
+ err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf,
+ req->rss_sz, req->rss_grps, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ /* Alloc memory for CQINT's HW contexts */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+ qints = (cfg >> 24) & 0xFFF;
+ hwctx_size = 1UL << ((ctx_cfg >> 24) & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->cq_ints_ctx, qints, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_BASE(nixlf),
+ (u64)pfvf->cq_ints_ctx->iova);
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_CFG(nixlf), BIT_ULL(36));
+
+ /* Alloc memory for QINT's HW contexts */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2);
+ qints = (cfg >> 12) & 0xFFF;
+ hwctx_size = 1UL << ((ctx_cfg >> 20) & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->nix_qints_ctx, qints, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_BASE(nixlf),
+ (u64)pfvf->nix_qints_ctx->iova);
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_CFG(nixlf), BIT_ULL(36));
+
+ /* Enable LMTST for this NIX LF */
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG2(nixlf), BIT_ULL(0));
+
+ /* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC
+ * If requester has sent a 'RVU_DEFAULT_PF_FUNC' use this NIX LF's
+ * PCIFUNC itself.
+ */
+ if (req->npa_func == RVU_DEFAULT_PF_FUNC)
+ cfg = pcifunc;
+ else
+ cfg = req->npa_func;
+
+ if (req->sso_func == RVU_DEFAULT_PF_FUNC)
+ cfg |= (u64)pcifunc << 16;
+ else
+ cfg |= (u64)req->sso_func << 16;
+
+ cfg |= (u64)req->xqe_sz << 33;
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf), cfg);
+
+ /* Config Rx pkt length, csum checks and apad enable / disable */
+ rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg);
+
+ goto exit;
+
+free_mem:
+ nix_ctx_free(rvu, pfvf);
+ rc = -ENOMEM;
+
+exit:
+ /* Set macaddr of this PF/VF */
+ ether_addr_copy(rsp->mac_addr, pfvf->mac_addr);
+
+ /* set SQB size info */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQ_CONST);
+ rsp->sqb_size = (cfg >> 34) & 0xFFFF;
+ rsp->lso_tsov4_idx = NIX_LSO_FORMAT_IDX_TSOV4;
+ rsp->lso_tsov6_idx = NIX_LSO_FORMAT_IDX_TSOV6;
+ return rc;
+}
+
+int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ struct rvu_block *block;
+ int blkaddr, nixlf, err;
+ struct rvu_pfvf *pfvf;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+ if (!pfvf->nixlf || blkaddr < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (nixlf < 0)
+ return NIX_AF_ERR_AF_LF_INVALID;
+
+ /* Reset this NIX LF */
+ err = rvu_lf_reset(rvu, block, nixlf);
+ if (err) {
+ dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n",
+ block->addr - BLKADDR_NIX0, nixlf);
+ return NIX_AF_ERR_LF_RESET;
+ }
+
+ nix_ctx_free(rvu, pfvf);
+
+ return 0;
+}
+
+static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
+{
+ struct nix_txsch *txsch;
+ u64 cfg, reg;
+ int err, lvl;
+
+ /* Get scheduler queue count of each type and alloc
+ * bitmap for each for alloc/free/attach operations.
+ */
+ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+ txsch = &nix_hw->txsch[lvl];
+ txsch->lvl = lvl;
+ switch (lvl) {
+ case NIX_TXSCH_LVL_SMQ:
+ reg = NIX_AF_MDQ_CONST;
+ break;
+ case NIX_TXSCH_LVL_TL4:
+ reg = NIX_AF_TL4_CONST;
+ break;
+ case NIX_TXSCH_LVL_TL3:
+ reg = NIX_AF_TL3_CONST;
+ break;
+ case NIX_TXSCH_LVL_TL2:
+ reg = NIX_AF_TL2_CONST;
+ break;
+ case NIX_TXSCH_LVL_TL1:
+ reg = NIX_AF_TL1_CONST;
+ break;
+ }
+ cfg = rvu_read64(rvu, blkaddr, reg);
+ txsch->schq.max = cfg & 0xFFFF;
+ err = rvu_alloc_bitmap(&txsch->schq);
+ if (err)
+ return err;
+
+ /* Allocate memory for scheduler queues to
+ * PF/VF pcifunc mapping info.
+ */
+ txsch->pfvf_map = devm_kcalloc(rvu->dev, txsch->schq.max,
+ sizeof(u16), GFP_KERNEL);
+ if (!txsch->pfvf_map)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int nix_calibrate_x2p(struct rvu *rvu, int blkaddr)
+{
+ int idx, err;
+ u64 status;
+
+ /* Start X2P bus calibration */
+ rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+ rvu_read64(rvu, blkaddr, NIX_AF_CFG) | BIT_ULL(9));
+ /* Wait for calibration to complete */
+ err = rvu_poll_reg(rvu, blkaddr,
+ NIX_AF_STATUS, BIT_ULL(10), false);
+ if (err) {
+ dev_err(rvu->dev, "NIX X2P bus calibration failed\n");
+ return err;
+ }
+
+ status = rvu_read64(rvu, blkaddr, NIX_AF_STATUS);
+ /* Check if CGX devices are ready */
+ for (idx = 0; idx < cgx_get_cgx_cnt(); idx++) {
+ if (status & (BIT_ULL(16 + idx)))
+ continue;
+ dev_err(rvu->dev,
+ "CGX%d didn't respond to NIX X2P calibration\n", idx);
+ err = -EBUSY;
+ }
+
+ /* Check if LBK is ready */
+ if (!(status & BIT_ULL(19))) {
+ dev_err(rvu->dev,
+ "LBK didn't respond to NIX X2P calibration\n");
+ err = -EBUSY;
+ }
+
+ /* Clear 'calibrate_x2p' bit */
+ rvu_write64(rvu, blkaddr, NIX_AF_CFG,
+ rvu_read64(rvu, blkaddr, NIX_AF_CFG) & ~BIT_ULL(9));
+ if (err || (status & 0x3FFULL))
+ dev_err(rvu->dev,
+ "NIX X2P calibration failed, status 0x%llx\n", status);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
+{
+ u64 cfg;
+ int err;
+
+ /* Set admin queue endianness */
+ cfg = rvu_read64(rvu, block->addr, NIX_AF_CFG);
+#ifdef __BIG_ENDIAN
+ cfg |= BIT_ULL(1);
+ rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
+#else
+ cfg &= ~BIT_ULL(1);
+ rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg);
+#endif
+
+ /* Do not bypass NDC cache */
+ cfg = rvu_read64(rvu, block->addr, NIX_AF_NDC_CFG);
+ cfg &= ~0x3FFEULL;
+ rvu_write64(rvu, block->addr, NIX_AF_NDC_CFG, cfg);
+
+ /* Result structure can be followed by RQ/SQ/CQ context at
+ * RES + 128bytes and a write mask at RES + 256 bytes, depending on
+ * operation type. Alloc sufficient result memory for all operations.
+ */
+ err = rvu_aq_alloc(rvu, &block->aq,
+ Q_COUNT(AQ_SIZE), sizeof(struct nix_aq_inst_s),
+ ALIGN(sizeof(struct nix_aq_res_s), 128) + 256);
+ if (err)
+ return err;
+
+ rvu_write64(rvu, block->addr, NIX_AF_AQ_CFG, AQ_SIZE);
+ rvu_write64(rvu, block->addr,
+ NIX_AF_AQ_BASE, (u64)block->aq->inst->iova);
+ return 0;
+}
+
+int rvu_nix_init(struct rvu *rvu)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct rvu_block *block;
+ int blkaddr, err;
+ u64 cfg;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+ if (blkaddr < 0)
+ return 0;
+ block = &hw->block[blkaddr];
+
+ /* Calibrate X2P bus to check if CGX/LBK links are fine */
+ err = nix_calibrate_x2p(rvu, blkaddr);
+ if (err)
+ return err;
+
+ /* Set num of links of each type */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+ hw->cgx = (cfg >> 12) & 0xF;
+ hw->lmac_per_cgx = (cfg >> 8) & 0xF;
+ hw->cgx_links = hw->cgx * hw->lmac_per_cgx;
+ hw->lbk_links = 1;
+ hw->sdp_links = 1;
+
+ /* Initialize admin queue */
+ err = nix_aq_init(rvu, block);
+ if (err)
+ return err;
+
+ /* Restore CINT timer delay to HW reset values */
+ rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL);
+
+ /* Configure segmentation offload formats */
+ nix_setup_lso(rvu, blkaddr);
+
+ if (blkaddr == BLKADDR_NIX0) {
+ hw->nix0 = devm_kzalloc(rvu->dev,
+ sizeof(struct nix_hw), GFP_KERNEL);
+ if (!hw->nix0)
+ return -ENOMEM;
+
+ err = nix_setup_txschq(rvu, hw->nix0, blkaddr);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+void rvu_nix_freemem(struct rvu *rvu)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct rvu_block *block;
+ struct nix_txsch *txsch;
+ struct nix_hw *nix_hw;
+ int blkaddr, lvl;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+ if (blkaddr < 0)
+ return;
+
+ block = &hw->block[blkaddr];
+ rvu_aq_free(rvu, block->aq);
+
+ if (blkaddr == BLKADDR_NIX0) {
+ nix_hw = get_nix_hw(rvu->hw, blkaddr);
+ if (!nix_hw)
+ return;
+
+ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+ txsch = &nix_hw->txsch[lvl];
+ kfree(txsch->schq.bmap);
+ }
+ }
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
new file mode 100644
index 000000000000..0e43a693d119
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver
+ *
+ * Copyright (C) 2018 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "rvu_struct.h"
+#include "rvu_reg.h"
+#include "rvu.h"
+
+static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+ struct npa_aq_inst_s *inst)
+{
+ struct admin_queue *aq = block->aq;
+ struct npa_aq_res_s *result;
+ int timeout = 1000;
+ u64 reg, head;
+
+ result = (struct npa_aq_res_s *)aq->res->base;
+
+ /* Get current head pointer where to append this instruction */
+ reg = rvu_read64(rvu, block->addr, NPA_AF_AQ_STATUS);
+ head = (reg >> 4) & AQ_PTR_MASK;
+
+ memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)),
+ (void *)inst, aq->inst->entry_sz);
+ memset(result, 0, sizeof(*result));
+ /* sync into memory */
+ wmb();
+
+ /* Ring the doorbell and wait for result */
+ rvu_write64(rvu, block->addr, NPA_AF_AQ_DOOR, 1);
+ while (result->compcode == NPA_AQ_COMP_NOTDONE) {
+ cpu_relax();
+ udelay(1);
+ timeout--;
+ if (!timeout)
+ return -EBUSY;
+ }
+
+ if (result->compcode != NPA_AQ_COMP_GOOD)
+ /* TODO: Replace this with some error code */
+ return -EBUSY;
+
+ return 0;
+}
+
+static int rvu_npa_aq_enq_inst(struct rvu *rvu, struct npa_aq_enq_req *req,
+ struct npa_aq_enq_rsp *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ int blkaddr, npalf, rc = 0;
+ struct npa_aq_inst_s inst;
+ struct rvu_block *block;
+ struct admin_queue *aq;
+ struct rvu_pfvf *pfvf;
+ void *ctx, *mask;
+ bool ena;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ if (!pfvf->aura_ctx || req->aura_id >= pfvf->aura_ctx->qsize)
+ return NPA_AF_ERR_AQ_ENQUEUE;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+ if (!pfvf->npalf || blkaddr < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ aq = block->aq;
+ if (!aq) {
+ dev_warn(rvu->dev, "%s: NPA AQ not initialized\n", __func__);
+ return NPA_AF_ERR_AQ_ENQUEUE;
+ }
+
+ npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (npalf < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ memset(&inst, 0, sizeof(struct npa_aq_inst_s));
+ inst.cindex = req->aura_id;
+ inst.lf = npalf;
+ inst.ctype = req->ctype;
+ inst.op = req->op;
+ /* Currently we are not supporting enqueuing multiple instructions,
+ * so always choose first entry in result memory.
+ */
+ inst.res_addr = (u64)aq->res->iova;
+
+ /* Clean result + context memory */
+ memset(aq->res->base, 0, aq->res->entry_sz);
+ /* Context needs to be written at RES_ADDR + 128 */
+ ctx = aq->res->base + 128;
+ /* Mask needs to be written at RES_ADDR + 256 */
+ mask = aq->res->base + 256;
+
+ switch (req->op) {
+ case NPA_AQ_INSTOP_WRITE:
+ /* Copy context and write mask */
+ if (req->ctype == NPA_AQ_CTYPE_AURA) {
+ memcpy(mask, &req->aura_mask,
+ sizeof(struct npa_aura_s));
+ memcpy(ctx, &req->aura, sizeof(struct npa_aura_s));
+ } else {
+ memcpy(mask, &req->pool_mask,
+ sizeof(struct npa_pool_s));
+ memcpy(ctx, &req->pool, sizeof(struct npa_pool_s));
+ }
+ break;
+ case NPA_AQ_INSTOP_INIT:
+ if (req->ctype == NPA_AQ_CTYPE_AURA) {
+ if (req->aura.pool_addr >= pfvf->pool_ctx->qsize) {
+ rc = NPA_AF_ERR_AQ_FULL;
+ break;
+ }
+ /* Set pool's context address */
+ req->aura.pool_addr = pfvf->pool_ctx->iova +
+ (req->aura.pool_addr * pfvf->pool_ctx->entry_sz);
+ memcpy(ctx, &req->aura, sizeof(struct npa_aura_s));
+ } else { /* POOL's context */
+ memcpy(ctx, &req->pool, sizeof(struct npa_pool_s));
+ }
+ break;
+ case NPA_AQ_INSTOP_NOP:
+ case NPA_AQ_INSTOP_READ:
+ case NPA_AQ_INSTOP_LOCK:
+ case NPA_AQ_INSTOP_UNLOCK:
+ break;
+ default:
+ rc = NPA_AF_ERR_AQ_FULL;
+ break;
+ }
+
+ if (rc)
+ return rc;
+
+ spin_lock(&aq->lock);
+
+ /* Submit the instruction to AQ */
+ rc = npa_aq_enqueue_wait(rvu, block, &inst);
+ if (rc) {
+ spin_unlock(&aq->lock);
+ return rc;
+ }
+
+ /* Set aura bitmap if aura hw context is enabled */
+ if (req->ctype == NPA_AQ_CTYPE_AURA) {
+ if (req->op == NPA_AQ_INSTOP_INIT && req->aura.ena)
+ __set_bit(req->aura_id, pfvf->aura_bmap);
+ if (req->op == NPA_AQ_INSTOP_WRITE) {
+ ena = (req->aura.ena & req->aura_mask.ena) |
+ (test_bit(req->aura_id, pfvf->aura_bmap) &
+ ~req->aura_mask.ena);
+ if (ena)
+ __set_bit(req->aura_id, pfvf->aura_bmap);
+ else
+ __clear_bit(req->aura_id, pfvf->aura_bmap);
+ }
+ }
+
+ /* Set pool bitmap if pool hw context is enabled */
+ if (req->ctype == NPA_AQ_CTYPE_POOL) {
+ if (req->op == NPA_AQ_INSTOP_INIT && req->pool.ena)
+ __set_bit(req->aura_id, pfvf->pool_bmap);
+ if (req->op == NPA_AQ_INSTOP_WRITE) {
+ ena = (req->pool.ena & req->pool_mask.ena) |
+ (test_bit(req->aura_id, pfvf->pool_bmap) &
+ ~req->pool_mask.ena);
+ if (ena)
+ __set_bit(req->aura_id, pfvf->pool_bmap);
+ else
+ __clear_bit(req->aura_id, pfvf->pool_bmap);
+ }
+ }
+ spin_unlock(&aq->lock);
+
+ if (rsp) {
+ /* Copy read context into mailbox */
+ if (req->op == NPA_AQ_INSTOP_READ) {
+ if (req->ctype == NPA_AQ_CTYPE_AURA)
+ memcpy(&rsp->aura, ctx,
+ sizeof(struct npa_aura_s));
+ else
+ memcpy(&rsp->pool, ctx,
+ sizeof(struct npa_pool_s));
+ }
+ }
+
+ return 0;
+}
+
+static int npa_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req)
+{
+ struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+ struct npa_aq_enq_req aq_req;
+ unsigned long *bmap;
+ int id, cnt = 0;
+ int err = 0, rc;
+
+ if (!pfvf->pool_ctx || !pfvf->aura_ctx)
+ return NPA_AF_ERR_AQ_ENQUEUE;
+
+ memset(&aq_req, 0, sizeof(struct npa_aq_enq_req));
+ aq_req.hdr.pcifunc = req->hdr.pcifunc;
+
+ if (req->ctype == NPA_AQ_CTYPE_POOL) {
+ aq_req.pool.ena = 0;
+ aq_req.pool_mask.ena = 1;
+ cnt = pfvf->pool_ctx->qsize;
+ bmap = pfvf->pool_bmap;
+ } else if (req->ctype == NPA_AQ_CTYPE_AURA) {
+ aq_req.aura.ena = 0;
+ aq_req.aura_mask.ena = 1;
+ cnt = pfvf->aura_ctx->qsize;
+ bmap = pfvf->aura_bmap;
+ }
+
+ aq_req.ctype = req->ctype;
+ aq_req.op = NPA_AQ_INSTOP_WRITE;
+
+ for (id = 0; id < cnt; id++) {
+ if (!test_bit(id, bmap))
+ continue;
+ aq_req.aura_id = id;
+ rc = rvu_npa_aq_enq_inst(rvu, &aq_req, NULL);
+ if (rc) {
+ err = rc;
+ dev_err(rvu->dev, "Failed to disable %s:%d context\n",
+ (req->ctype == NPA_AQ_CTYPE_AURA) ?
+ "Aura" : "Pool", id);
+ }
+ }
+
+ return err;
+}
+
+int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu,
+ struct npa_aq_enq_req *req,
+ struct npa_aq_enq_rsp *rsp)
+{
+ return rvu_npa_aq_enq_inst(rvu, req, rsp);
+}
+
+int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu,
+ struct hwctx_disable_req *req,
+ struct msg_rsp *rsp)
+{
+ return npa_lf_hwctx_disable(rvu, req);
+}
+
+static void npa_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
+{
+ kfree(pfvf->aura_bmap);
+ pfvf->aura_bmap = NULL;
+
+ qmem_free(rvu->dev, pfvf->aura_ctx);
+ pfvf->aura_ctx = NULL;
+
+ kfree(pfvf->pool_bmap);
+ pfvf->pool_bmap = NULL;
+
+ qmem_free(rvu->dev, pfvf->pool_ctx);
+ pfvf->pool_ctx = NULL;
+
+ qmem_free(rvu->dev, pfvf->npa_qints_ctx);
+ pfvf->npa_qints_ctx = NULL;
+}
+
+int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu,
+ struct npa_lf_alloc_req *req,
+ struct npa_lf_alloc_rsp *rsp)
+{
+ int npalf, qints, hwctx_size, err, rc = 0;
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ struct rvu_block *block;
+ struct rvu_pfvf *pfvf;
+ u64 cfg, ctx_cfg;
+ int blkaddr;
+
+ if (req->aura_sz > NPA_AURA_SZ_MAX ||
+ req->aura_sz == NPA_AURA_SZ_0 || !req->nr_pools)
+ return NPA_AF_ERR_PARAM;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+ if (!pfvf->npalf || blkaddr < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (npalf < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ /* Reset this NPA LF */
+ err = rvu_lf_reset(rvu, block, npalf);
+ if (err) {
+ dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf);
+ return NPA_AF_ERR_LF_RESET;
+ }
+
+ ctx_cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST1);
+
+ /* Alloc memory for aura HW contexts */
+ hwctx_size = 1UL << (ctx_cfg & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->aura_ctx,
+ NPA_AURA_COUNT(req->aura_sz), hwctx_size);
+ if (err)
+ goto free_mem;
+
+ pfvf->aura_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long),
+ GFP_KERNEL);
+ if (!pfvf->aura_bmap)
+ goto free_mem;
+
+ /* Alloc memory for pool HW contexts */
+ hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->pool_ctx, req->nr_pools, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ pfvf->pool_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long),
+ GFP_KERNEL);
+ if (!pfvf->pool_bmap)
+ goto free_mem;
+
+ /* Get no of queue interrupts supported */
+ cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST);
+ qints = (cfg >> 28) & 0xFFF;
+
+ /* Alloc memory for Qints HW contexts */
+ hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF);
+ err = qmem_alloc(rvu->dev, &pfvf->npa_qints_ctx, qints, hwctx_size);
+ if (err)
+ goto free_mem;
+
+ cfg = rvu_read64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf));
+ /* Clear way partition mask and set aura offset to '0' */
+ cfg &= ~(BIT_ULL(34) - 1);
+ /* Set aura size & enable caching of contexts */
+ cfg |= (req->aura_sz << 16) | BIT_ULL(34);
+ rvu_write64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf), cfg);
+
+ /* Configure aura HW context's base */
+ rvu_write64(rvu, blkaddr, NPA_AF_LFX_LOC_AURAS_BASE(npalf),
+ (u64)pfvf->aura_ctx->iova);
+
+ /* Enable caching of qints hw context */
+ rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_CFG(npalf), BIT_ULL(36));
+ rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_BASE(npalf),
+ (u64)pfvf->npa_qints_ctx->iova);
+
+ goto exit;
+
+free_mem:
+ npa_ctx_free(rvu, pfvf);
+ rc = -ENOMEM;
+
+exit:
+ /* set stack page info */
+ cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST);
+ rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF;
+ rsp->stack_pg_bytes = cfg & 0xFF;
+ rsp->qints = (cfg >> 28) & 0xFFF;
+ return rc;
+}
+
+int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req,
+ struct msg_rsp *rsp)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u16 pcifunc = req->hdr.pcifunc;
+ struct rvu_block *block;
+ struct rvu_pfvf *pfvf;
+ int npalf, err;
+ int blkaddr;
+
+ pfvf = rvu_get_pfvf(rvu, pcifunc);
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc);
+ if (!pfvf->npalf || blkaddr < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ block = &hw->block[blkaddr];
+ npalf = rvu_get_lf(rvu, block, pcifunc, 0);
+ if (npalf < 0)
+ return NPA_AF_ERR_AF_LF_INVALID;
+
+ /* Reset this NPA LF */
+ err = rvu_lf_reset(rvu, block, npalf);
+ if (err) {
+ dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf);
+ return NPA_AF_ERR_LF_RESET;
+ }
+
+ npa_ctx_free(rvu, pfvf);
+
+ return 0;
+}
+
+static int npa_aq_init(struct rvu *rvu, struct rvu_block *block)
+{
+ u64 cfg;
+ int err;
+
+ /* Set admin queue endianness */
+ cfg = rvu_read64(rvu, block->addr, NPA_AF_GEN_CFG);
+#ifdef __BIG_ENDIAN
+ cfg |= BIT_ULL(1);
+ rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg);
+#else
+ cfg &= ~BIT_ULL(1);
+ rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg);
+#endif
+
+ /* Do not bypass NDC cache */
+ cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG);
+ cfg &= ~0x03DULL;
+ rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg);
+
+ /* Result structure can be followed by Aura/Pool context at
+ * RES + 128bytes and a write mask at RES + 256 bytes, depending on
+ * operation type. Alloc sufficient result memory for all operations.
+ */
+ err = rvu_aq_alloc(rvu, &block->aq,
+ Q_COUNT(AQ_SIZE), sizeof(struct npa_aq_inst_s),
+ ALIGN(sizeof(struct npa_aq_res_s), 128) + 256);
+ if (err)
+ return err;
+
+ rvu_write64(rvu, block->addr, NPA_AF_AQ_CFG, AQ_SIZE);
+ rvu_write64(rvu, block->addr,
+ NPA_AF_AQ_BASE, (u64)block->aq->inst->iova);
+ return 0;
+}
+
+int rvu_npa_init(struct rvu *rvu)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct rvu_block *block;
+ int blkaddr, err;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+ if (blkaddr < 0)
+ return 0;
+
+ block = &hw->block[blkaddr];
+
+ /* Initialize admin queue */
+ err = npa_aq_init(rvu, &hw->block[blkaddr]);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void rvu_npa_freemem(struct rvu *rvu)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ struct rvu_block *block;
+ int blkaddr;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0);
+ if (blkaddr < 0)
+ return;
+
+ block = &hw->block[blkaddr];
+ rvu_aq_free(rvu, block->aq);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 92e05817ffe7..c331b237a26f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -71,4 +71,812 @@ enum rvu_pf_int_vec_e {
RVU_PF_INT_VEC_CNT = 0x7,
};
+/* NPA admin queue completion enumeration */
+enum npa_aq_comp {
+ NPA_AQ_COMP_NOTDONE = 0x0,
+ NPA_AQ_COMP_GOOD = 0x1,
+ NPA_AQ_COMP_SWERR = 0x2,
+ NPA_AQ_COMP_CTX_POISON = 0x3,
+ NPA_AQ_COMP_CTX_FAULT = 0x4,
+ NPA_AQ_COMP_LOCKERR = 0x5,
+};
+
+/* NPA admin queue context types */
+enum npa_aq_ctype {
+ NPA_AQ_CTYPE_AURA = 0x0,
+ NPA_AQ_CTYPE_POOL = 0x1,
+};
+
+/* NPA admin queue instruction opcodes */
+enum npa_aq_instop {
+ NPA_AQ_INSTOP_NOP = 0x0,
+ NPA_AQ_INSTOP_INIT = 0x1,
+ NPA_AQ_INSTOP_WRITE = 0x2,
+ NPA_AQ_INSTOP_READ = 0x3,
+ NPA_AQ_INSTOP_LOCK = 0x4,
+ NPA_AQ_INSTOP_UNLOCK = 0x5,
+};
+
+/* NPA admin queue instruction structure */
+struct npa_aq_inst_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 doneint : 1; /* W0 */
+ u64 reserved_44_62 : 19;
+ u64 cindex : 20;
+ u64 reserved_17_23 : 7;
+ u64 lf : 9;
+ u64 ctype : 4;
+ u64 op : 4;
+#else
+ u64 op : 4;
+ u64 ctype : 4;
+ u64 lf : 9;
+ u64 reserved_17_23 : 7;
+ u64 cindex : 20;
+ u64 reserved_44_62 : 19;
+ u64 doneint : 1;
+#endif
+ u64 res_addr; /* W1 */
+};
+
+/* NPA admin queue result structure */
+struct npa_aq_res_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_17_63 : 47; /* W0 */
+ u64 doneint : 1;
+ u64 compcode : 8;
+ u64 ctype : 4;
+ u64 op : 4;
+#else
+ u64 op : 4;
+ u64 ctype : 4;
+ u64 compcode : 8;
+ u64 doneint : 1;
+ u64 reserved_17_63 : 47;
+#endif
+ u64 reserved_64_127; /* W1 */
+};
+
+struct npa_aura_s {
+ u64 pool_addr; /* W0 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */
+ u64 avg_level : 8;
+ u64 reserved_118_119 : 2;
+ u64 shift : 6;
+ u64 aura_drop : 8;
+ u64 reserved_98_103 : 6;
+ u64 bp_ena : 2;
+ u64 aura_drop_ena : 1;
+ u64 pool_drop_ena : 1;
+ u64 reserved_93 : 1;
+ u64 avg_con : 9;
+ u64 pool_way_mask : 16;
+ u64 pool_caching : 1;
+ u64 reserved_65 : 2;
+ u64 ena : 1;
+#else
+ u64 ena : 1;
+ u64 reserved_65 : 2;
+ u64 pool_caching : 1;
+ u64 pool_way_mask : 16;
+ u64 avg_con : 9;
+ u64 reserved_93 : 1;
+ u64 pool_drop_ena : 1;
+ u64 aura_drop_ena : 1;
+ u64 bp_ena : 2;
+ u64 reserved_98_103 : 6;
+ u64 aura_drop : 8;
+ u64 shift : 6;
+ u64 reserved_118_119 : 2;
+ u64 avg_level : 8;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */
+ u64 reserved_189_191 : 3;
+ u64 nix1_bpid : 9;
+ u64 reserved_177_179 : 3;
+ u64 nix0_bpid : 9;
+ u64 reserved_164_167 : 4;
+ u64 count : 36;
+#else
+ u64 count : 36;
+ u64 reserved_164_167 : 4;
+ u64 nix0_bpid : 9;
+ u64 reserved_177_179 : 3;
+ u64 nix1_bpid : 9;
+ u64 reserved_189_191 : 3;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */
+ u64 reserved_252_255 : 4;
+ u64 fc_hyst_bits : 4;
+ u64 fc_stype : 2;
+ u64 fc_up_crossing : 1;
+ u64 fc_ena : 1;
+ u64 reserved_240_243 : 4;
+ u64 bp : 8;
+ u64 reserved_228_231 : 4;
+ u64 limit : 36;
+#else
+ u64 limit : 36;
+ u64 reserved_228_231 : 4;
+ u64 bp : 8;
+ u64 reserved_240_243 : 4;
+ u64 fc_ena : 1;
+ u64 fc_up_crossing : 1;
+ u64 fc_stype : 2;
+ u64 fc_hyst_bits : 4;
+ u64 reserved_252_255 : 4;
+#endif
+ u64 fc_addr; /* W4 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */
+ u64 reserved_379_383 : 5;
+ u64 err_qint_idx : 7;
+ u64 reserved_371 : 1;
+ u64 thresh_qint_idx : 7;
+ u64 reserved_363 : 1;
+ u64 thresh_up : 1;
+ u64 thresh_int_ena : 1;
+ u64 thresh_int : 1;
+ u64 err_int_ena : 8;
+ u64 err_int : 8;
+ u64 update_time : 16;
+ u64 pool_drop : 8;
+#else
+ u64 pool_drop : 8;
+ u64 update_time : 16;
+ u64 err_int : 8;
+ u64 err_int_ena : 8;
+ u64 thresh_int : 1;
+ u64 thresh_int_ena : 1;
+ u64 thresh_up : 1;
+ u64 reserved_363 : 1;
+ u64 thresh_qint_idx : 7;
+ u64 reserved_371 : 1;
+ u64 err_qint_idx : 7;
+ u64 reserved_379_383 : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */
+ u64 reserved_420_447 : 28;
+ u64 thresh : 36;
+#else
+ u64 thresh : 36;
+ u64 reserved_420_447 : 28;
+#endif
+ u64 reserved_448_511; /* W7 */
+};
+
+struct npa_pool_s {
+ u64 stack_base; /* W0 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */
+ u64 reserved_115_127 : 13;
+ u64 buf_size : 11;
+ u64 reserved_100_103 : 4;
+ u64 buf_offset : 12;
+ u64 stack_way_mask : 16;
+ u64 reserved_70_71 : 3;
+ u64 stack_caching : 1;
+ u64 reserved_66_67 : 2;
+ u64 nat_align : 1;
+ u64 ena : 1;
+#else
+ u64 ena : 1;
+ u64 nat_align : 1;
+ u64 reserved_66_67 : 2;
+ u64 stack_caching : 1;
+ u64 reserved_70_71 : 3;
+ u64 stack_way_mask : 16;
+ u64 buf_offset : 12;
+ u64 reserved_100_103 : 4;
+ u64 buf_size : 11;
+ u64 reserved_115_127 : 13;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */
+ u64 stack_pages : 32;
+ u64 stack_max_pages : 32;
+#else
+ u64 stack_max_pages : 32;
+ u64 stack_pages : 32;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */
+ u64 reserved_240_255 : 16;
+ u64 op_pc : 48;
+#else
+ u64 op_pc : 48;
+ u64 reserved_240_255 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */
+ u64 reserved_316_319 : 4;
+ u64 update_time : 16;
+ u64 reserved_297_299 : 3;
+ u64 fc_up_crossing : 1;
+ u64 fc_hyst_bits : 4;
+ u64 fc_stype : 2;
+ u64 fc_ena : 1;
+ u64 avg_con : 9;
+ u64 avg_level : 8;
+ u64 reserved_270_271 : 2;
+ u64 shift : 6;
+ u64 reserved_260_263 : 4;
+ u64 stack_offset : 4;
+#else
+ u64 stack_offset : 4;
+ u64 reserved_260_263 : 4;
+ u64 shift : 6;
+ u64 reserved_270_271 : 2;
+ u64 avg_level : 8;
+ u64 avg_con : 9;
+ u64 fc_ena : 1;
+ u64 fc_stype : 2;
+ u64 fc_hyst_bits : 4;
+ u64 fc_up_crossing : 1;
+ u64 reserved_297_299 : 3;
+ u64 update_time : 16;
+ u64 reserved_316_319 : 4;
+#endif
+ u64 fc_addr; /* W5 */
+ u64 ptr_start; /* W6 */
+ u64 ptr_end; /* W7 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */
+ u64 reserved_571_575 : 5;
+ u64 err_qint_idx : 7;
+ u64 reserved_563 : 1;
+ u64 thresh_qint_idx : 7;
+ u64 reserved_555 : 1;
+ u64 thresh_up : 1;
+ u64 thresh_int_ena : 1;
+ u64 thresh_int : 1;
+ u64 err_int_ena : 8;
+ u64 err_int : 8;
+ u64 reserved_512_535 : 24;
+#else
+ u64 reserved_512_535 : 24;
+ u64 err_int : 8;
+ u64 err_int_ena : 8;
+ u64 thresh_int : 1;
+ u64 thresh_int_ena : 1;
+ u64 thresh_up : 1;
+ u64 reserved_555 : 1;
+ u64 thresh_qint_idx : 7;
+ u64 reserved_563 : 1;
+ u64 err_qint_idx : 7;
+ u64 reserved_571_575 : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */
+ u64 reserved_612_639 : 28;
+ u64 thresh : 36;
+#else
+ u64 thresh : 36;
+ u64 reserved_612_639 : 28;
+#endif
+ u64 reserved_640_703; /* W10 */
+ u64 reserved_704_767; /* W11 */
+ u64 reserved_768_831; /* W12 */
+ u64 reserved_832_895; /* W13 */
+ u64 reserved_896_959; /* W14 */
+ u64 reserved_960_1023; /* W15 */
+};
+
+/* NIX admin queue completion status */
+enum nix_aq_comp {
+ NIX_AQ_COMP_NOTDONE = 0x0,
+ NIX_AQ_COMP_GOOD = 0x1,
+ NIX_AQ_COMP_SWERR = 0x2,
+ NIX_AQ_COMP_CTX_POISON = 0x3,
+ NIX_AQ_COMP_CTX_FAULT = 0x4,
+ NIX_AQ_COMP_LOCKERR = 0x5,
+ NIX_AQ_COMP_SQB_ALLOC_FAIL = 0x6,
+};
+
+/* NIX admin queue context types */
+enum nix_aq_ctype {
+ NIX_AQ_CTYPE_RQ = 0x0,
+ NIX_AQ_CTYPE_SQ = 0x1,
+ NIX_AQ_CTYPE_CQ = 0x2,
+ NIX_AQ_CTYPE_MCE = 0x3,
+ NIX_AQ_CTYPE_RSS = 0x4,
+ NIX_AQ_CTYPE_DYNO = 0x5,
+};
+
+/* NIX admin queue instruction opcodes */
+enum nix_aq_instop {
+ NIX_AQ_INSTOP_NOP = 0x0,
+ NIX_AQ_INSTOP_INIT = 0x1,
+ NIX_AQ_INSTOP_WRITE = 0x2,
+ NIX_AQ_INSTOP_READ = 0x3,
+ NIX_AQ_INSTOP_LOCK = 0x4,
+ NIX_AQ_INSTOP_UNLOCK = 0x5,
+};
+
+/* NIX admin queue instruction structure */
+struct nix_aq_inst_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 doneint : 1; /* W0 */
+ u64 reserved_44_62 : 19;
+ u64 cindex : 20;
+ u64 reserved_15_23 : 9;
+ u64 lf : 7;
+ u64 ctype : 4;
+ u64 op : 4;
+#else
+ u64 op : 4;
+ u64 ctype : 4;
+ u64 lf : 7;
+ u64 reserved_15_23 : 9;
+ u64 cindex : 20;
+ u64 reserved_44_62 : 19;
+ u64 doneint : 1;
+#endif
+ u64 res_addr; /* W1 */
+};
+
+/* NIX admin queue result structure */
+struct nix_aq_res_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 reserved_17_63 : 47; /* W0 */
+ u64 doneint : 1;
+ u64 compcode : 8;
+ u64 ctype : 4;
+ u64 op : 4;
+#else
+ u64 op : 4;
+ u64 ctype : 4;
+ u64 compcode : 8;
+ u64 doneint : 1;
+ u64 reserved_17_63 : 47;
+#endif
+ u64 reserved_64_127; /* W1 */
+};
+
+/* NIX Completion queue context structure */
+struct nix_cq_ctx_s {
+ u64 base;
+#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */
+ u64 wrptr : 20;
+ u64 avg_con : 9;
+ u64 cint_idx : 7;
+ u64 cq_err : 1;
+ u64 qint_idx : 7;
+ u64 rsvd_81_83 : 3;
+ u64 bpid : 9;
+ u64 rsvd_69_71 : 3;
+ u64 bp_ena : 1;
+ u64 rsvd_64_67 : 4;
+#else
+ u64 rsvd_64_67 : 4;
+ u64 bp_ena : 1;
+ u64 rsvd_69_71 : 3;
+ u64 bpid : 9;
+ u64 rsvd_81_83 : 3;
+ u64 qint_idx : 7;
+ u64 cq_err : 1;
+ u64 cint_idx : 7;
+ u64 avg_con : 9;
+ u64 wrptr : 20;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */
+ u64 update_time : 16;
+ u64 avg_level : 8;
+ u64 head : 20;
+ u64 tail : 20;
+#else
+ u64 tail : 20;
+ u64 head : 20;
+ u64 avg_level : 8;
+ u64 update_time : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */
+ u64 cq_err_int_ena : 8;
+ u64 cq_err_int : 8;
+ u64 qsize : 4;
+ u64 rsvd_233_235 : 3;
+ u64 caching : 1;
+ u64 substream : 20;
+ u64 rsvd_210_211 : 2;
+ u64 ena : 1;
+ u64 drop_ena : 1;
+ u64 drop : 8;
+ u64 dp : 8;
+#else
+ u64 dp : 8;
+ u64 drop : 8;
+ u64 drop_ena : 1;
+ u64 ena : 1;
+ u64 rsvd_210_211 : 2;
+ u64 substream : 20;
+ u64 caching : 1;
+ u64 rsvd_233_235 : 3;
+ u64 qsize : 4;
+ u64 cq_err_int : 8;
+ u64 cq_err_int_ena : 8;
+#endif
+};
+
+/* NIX Receive queue context structure */
+struct nix_rq_ctx_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */
+ u64 wqe_aura : 20;
+ u64 substream : 20;
+ u64 cq : 20;
+ u64 ena_wqwd : 1;
+ u64 ipsech_ena : 1;
+ u64 sso_ena : 1;
+ u64 ena : 1;
+#else
+ u64 ena : 1;
+ u64 sso_ena : 1;
+ u64 ipsech_ena : 1;
+ u64 ena_wqwd : 1;
+ u64 cq : 20;
+ u64 substream : 20;
+ u64 wqe_aura : 20;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */
+ u64 rsvd_127_122 : 6;
+ u64 lpb_drop_ena : 1;
+ u64 spb_drop_ena : 1;
+ u64 xqe_drop_ena : 1;
+ u64 wqe_caching : 1;
+ u64 pb_caching : 2;
+ u64 sso_tt : 2;
+ u64 sso_grp : 10;
+ u64 lpb_aura : 20;
+ u64 spb_aura : 20;
+#else
+ u64 spb_aura : 20;
+ u64 lpb_aura : 20;
+ u64 sso_grp : 10;
+ u64 sso_tt : 2;
+ u64 pb_caching : 2;
+ u64 wqe_caching : 1;
+ u64 xqe_drop_ena : 1;
+ u64 spb_drop_ena : 1;
+ u64 lpb_drop_ena : 1;
+ u64 rsvd_127_122 : 6;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */
+ u64 xqe_hdr_split : 1;
+ u64 xqe_imm_copy : 1;
+ u64 rsvd_189_184 : 6;
+ u64 xqe_imm_size : 6;
+ u64 later_skip : 6;
+ u64 rsvd_171 : 1;
+ u64 first_skip : 7;
+ u64 lpb_sizem1 : 12;
+ u64 spb_ena : 1;
+ u64 rsvd_150_148 : 3;
+ u64 wqe_skip : 2;
+ u64 spb_sizem1 : 6;
+ u64 rsvd_139_128 : 12;
+#else
+ u64 rsvd_139_128 : 12;
+ u64 spb_sizem1 : 6;
+ u64 wqe_skip : 2;
+ u64 rsvd_150_148 : 3;
+ u64 spb_ena : 1;
+ u64 lpb_sizem1 : 12;
+ u64 first_skip : 7;
+ u64 rsvd_171 : 1;
+ u64 later_skip : 6;
+ u64 xqe_imm_size : 6;
+ u64 rsvd_189_184 : 6;
+ u64 xqe_imm_copy : 1;
+ u64 xqe_hdr_split : 1;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */
+ u64 spb_pool_pass : 8;
+ u64 spb_pool_drop : 8;
+ u64 spb_aura_pass : 8;
+ u64 spb_aura_drop : 8;
+ u64 wqe_pool_pass : 8;
+ u64 wqe_pool_drop : 8;
+ u64 xqe_pass : 8;
+ u64 xqe_drop : 8;
+#else
+ u64 xqe_drop : 8;
+ u64 xqe_pass : 8;
+ u64 wqe_pool_drop : 8;
+ u64 wqe_pool_pass : 8;
+ u64 spb_aura_drop : 8;
+ u64 spb_aura_pass : 8;
+ u64 spb_pool_drop : 8;
+ u64 spb_pool_pass : 8;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */
+ u64 rsvd_319_315 : 5;
+ u64 qint_idx : 7;
+ u64 rq_int_ena : 8;
+ u64 rq_int : 8;
+ u64 rsvd_291_288 : 4;
+ u64 lpb_pool_pass : 8;
+ u64 lpb_pool_drop : 8;
+ u64 lpb_aura_pass : 8;
+ u64 lpb_aura_drop : 8;
+#else
+ u64 lpb_aura_drop : 8;
+ u64 lpb_aura_pass : 8;
+ u64 lpb_pool_drop : 8;
+ u64 lpb_pool_pass : 8;
+ u64 rsvd_291_288 : 4;
+ u64 rq_int : 8;
+ u64 rq_int_ena : 8;
+ u64 qint_idx : 7;
+ u64 rsvd_319_315 : 5;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */
+ u64 rsvd_383_366 : 18;
+ u64 flow_tagw : 6;
+ u64 bad_utag : 8;
+ u64 good_utag : 8;
+ u64 ltag : 24;
+#else
+ u64 ltag : 24;
+ u64 good_utag : 8;
+ u64 bad_utag : 8;
+ u64 flow_tagw : 6;
+ u64 rsvd_383_366 : 18;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */
+ u64 rsvd_447_432 : 16;
+ u64 octs : 48;
+#else
+ u64 octs : 48;
+ u64 rsvd_447_432 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W7 */
+ u64 rsvd_511_496 : 16;
+ u64 pkts : 48;
+#else
+ u64 pkts : 48;
+ u64 rsvd_511_496 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */
+ u64 rsvd_575_560 : 16;
+ u64 drop_octs : 48;
+#else
+ u64 drop_octs : 48;
+ u64 rsvd_575_560 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */
+ u64 rsvd_639_624 : 16;
+ u64 drop_pkts : 48;
+#else
+ u64 drop_pkts : 48;
+ u64 rsvd_639_624 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */
+ u64 rsvd_703_688 : 16;
+ u64 re_pkts : 48;
+#else
+ u64 re_pkts : 48;
+ u64 rsvd_703_688 : 16;
+#endif
+ u64 rsvd_767_704; /* W11 */
+ u64 rsvd_831_768; /* W12 */
+ u64 rsvd_895_832; /* W13 */
+ u64 rsvd_959_896; /* W14 */
+ u64 rsvd_1023_960; /* W15 */
+};
+
+/* NIX sqe sizes */
+enum nix_maxsqesz {
+ NIX_MAXSQESZ_W16 = 0x0,
+ NIX_MAXSQESZ_W8 = 0x1,
+};
+
+/* NIX SQB caching type */
+enum nix_stype {
+ NIX_STYPE_STF = 0x0,
+ NIX_STYPE_STT = 0x1,
+ NIX_STYPE_STP = 0x2,
+};
+
+/* NIX Send queue context structure */
+struct nix_sq_ctx_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */
+ u64 sqe_way_mask : 16;
+ u64 cq : 20;
+ u64 sdp_mcast : 1;
+ u64 substream : 20;
+ u64 qint_idx : 6;
+ u64 ena : 1;
+#else
+ u64 ena : 1;
+ u64 qint_idx : 6;
+ u64 substream : 20;
+ u64 sdp_mcast : 1;
+ u64 cq : 20;
+ u64 sqe_way_mask : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */
+ u64 sqb_count : 16;
+ u64 default_chan : 12;
+ u64 smq_rr_quantum : 24;
+ u64 sso_ena : 1;
+ u64 xoff : 1;
+ u64 cq_ena : 1;
+ u64 smq : 9;
+#else
+ u64 smq : 9;
+ u64 cq_ena : 1;
+ u64 xoff : 1;
+ u64 sso_ena : 1;
+ u64 smq_rr_quantum : 24;
+ u64 default_chan : 12;
+ u64 sqb_count : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */
+ u64 rsvd_191 : 1;
+ u64 sqe_stype : 2;
+ u64 sq_int_ena : 8;
+ u64 sq_int : 8;
+ u64 sqb_aura : 20;
+ u64 smq_rr_count : 25;
+#else
+ u64 smq_rr_count : 25;
+ u64 sqb_aura : 20;
+ u64 sq_int : 8;
+ u64 sq_int_ena : 8;
+ u64 sqe_stype : 2;
+ u64 rsvd_191 : 1;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */
+ u64 rsvd_255_253 : 3;
+ u64 smq_next_sq_vld : 1;
+ u64 smq_pend : 1;
+ u64 smenq_next_sqb_vld : 1;
+ u64 head_offset : 6;
+ u64 smenq_offset : 6;
+ u64 tail_offset : 6;
+ u64 smq_lso_segnum : 8;
+ u64 smq_next_sq : 20;
+ u64 mnq_dis : 1;
+ u64 lmt_dis : 1;
+ u64 cq_limit : 8;
+ u64 max_sqe_size : 2;
+#else
+ u64 max_sqe_size : 2;
+ u64 cq_limit : 8;
+ u64 lmt_dis : 1;
+ u64 mnq_dis : 1;
+ u64 smq_next_sq : 20;
+ u64 smq_lso_segnum : 8;
+ u64 tail_offset : 6;
+ u64 smenq_offset : 6;
+ u64 head_offset : 6;
+ u64 smenq_next_sqb_vld : 1;
+ u64 smq_pend : 1;
+ u64 smq_next_sq_vld : 1;
+ u64 rsvd_255_253 : 3;
+#endif
+ u64 next_sqb : 64;/* W4 */
+ u64 tail_sqb : 64;/* W5 */
+ u64 smenq_sqb : 64;/* W6 */
+ u64 smenq_next_sqb : 64;/* W7 */
+ u64 head_sqb : 64;/* W8 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */
+ u64 rsvd_639_630 : 10;
+ u64 vfi_lso_vld : 1;
+ u64 vfi_lso_vlan1_ins_ena : 1;
+ u64 vfi_lso_vlan0_ins_ena : 1;
+ u64 vfi_lso_mps : 14;
+ u64 vfi_lso_sb : 8;
+ u64 vfi_lso_sizem1 : 3;
+ u64 vfi_lso_total : 18;
+ u64 rsvd_583_576 : 8;
+#else
+ u64 rsvd_583_576 : 8;
+ u64 vfi_lso_total : 18;
+ u64 vfi_lso_sizem1 : 3;
+ u64 vfi_lso_sb : 8;
+ u64 vfi_lso_mps : 14;
+ u64 vfi_lso_vlan0_ins_ena : 1;
+ u64 vfi_lso_vlan1_ins_ena : 1;
+ u64 vfi_lso_vld : 1;
+ u64 rsvd_639_630 : 10;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */
+ u64 rsvd_703_658 : 46;
+ u64 scm_lso_rem : 18;
+#else
+ u64 scm_lso_rem : 18;
+ u64 rsvd_703_658 : 46;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W11 */
+ u64 rsvd_767_752 : 16;
+ u64 octs : 48;
+#else
+ u64 octs : 48;
+ u64 rsvd_767_752 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W12 */
+ u64 rsvd_831_816 : 16;
+ u64 pkts : 48;
+#else
+ u64 pkts : 48;
+ u64 rsvd_831_816 : 16;
+#endif
+ u64 rsvd_895_832 : 64;/* W13 */
+#if defined(__BIG_ENDIAN_BITFIELD) /* W14 */
+ u64 rsvd_959_944 : 16;
+ u64 dropped_octs : 48;
+#else
+ u64 dropped_octs : 48;
+ u64 rsvd_959_944 : 16;
+#endif
+#if defined(__BIG_ENDIAN_BITFIELD) /* W15 */
+ u64 rsvd_1023_1008 : 16;
+ u64 dropped_pkts : 48;
+#else
+ u64 dropped_pkts : 48;
+ u64 rsvd_1023_1008 : 16;
+#endif
+};
+
+/* NIX Receive side scaling entry structure*/
+struct nix_rsse_s {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ uint32_t reserved_20_31 : 12;
+ uint32_t rq : 20;
+#else
+ uint32_t rq : 20;
+ uint32_t reserved_20_31 : 12;
+
+#endif
+};
+
+/* NIX receive multicast/mirror entry structure */
+struct nix_rx_mce_s {
+#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */
+ uint64_t next : 16;
+ uint64_t pf_func : 16;
+ uint64_t rsvd_31_24 : 8;
+ uint64_t index : 20;
+ uint64_t eol : 1;
+ uint64_t rsvd_2 : 1;
+ uint64_t op : 2;
+#else
+ uint64_t op : 2;
+ uint64_t rsvd_2 : 1;
+ uint64_t eol : 1;
+ uint64_t index : 20;
+ uint64_t rsvd_31_24 : 8;
+ uint64_t pf_func : 16;
+ uint64_t next : 16;
+#endif
+};
+
+enum nix_lsoalg {
+ NIX_LSOALG_NOP,
+ NIX_LSOALG_ADD_SEGNUM,
+ NIX_LSOALG_ADD_PAYLEN,
+ NIX_LSOALG_ADD_OFFSET,
+ NIX_LSOALG_TCP_FLAGS,
+};
+
+enum nix_txlayer {
+ NIX_TXLAYER_OL3,
+ NIX_TXLAYER_OL4,
+ NIX_TXLAYER_IL3,
+ NIX_TXLAYER_IL4,
+};
+
+struct nix_lso_format {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u64 rsvd_19_63 : 45;
+ u64 alg : 3;
+ u64 rsvd_14_15 : 2;
+ u64 sizem1 : 2;
+ u64 rsvd_10_11 : 2;
+ u64 layer : 2;
+ u64 offset : 8;
+#else
+ u64 offset : 8;
+ u64 layer : 2;
+ u64 rsvd_10_11 : 2;
+ u64 sizem1 : 2;
+ u64 rsvd_14_15 : 2;
+ u64 alg : 3;
+ u64 rsvd_19_63 : 45;
+#endif
+};
+
#endif /* RVU_STRUCT_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a53736c26c0c..a5a0823e5ada 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -308,10 +308,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
- case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT:
case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_DEALLOC_MEMIC:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -426,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
- case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
case MLX5_CMD_OP_FPGA_CREATE_QP:
case MLX5_CMD_OP_FPGA_MODIFY_QP:
@@ -435,6 +436,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_ALLOC_MEMIC:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -599,8 +601,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
- MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
- MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+ MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT);
MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
@@ -617,6 +619,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
+ MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
default: return "unknown command opcode";
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index a4179122a279..4b85abb5c9f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->cons_index = 0;
cq->arm_sn = 0;
cq->eq = eq;
+ cq->uid = MLX5_GET(create_cq_in, in, uid);
refcount_set(&cq->refcount, 1);
init_completion(&cq->free);
if (!cq->comp)
@@ -144,6 +145,7 @@ err_cmd:
memset(dout, 0, sizeof(dout));
MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, din, uid, cq->uid);
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
return err;
}
@@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, in, uid, cq->uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
@@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+ MLX5_SET(modify_cq_in, in, uid, cq->uid);
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
EXPORT_SYMBOL(mlx5_core_modify_cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 0240aee9189e..d027ce00c8ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg,
{MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\
{MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\
{MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\
- {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\
{MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\
{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\
{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\
@@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule,
memcpy(__entry->destination,
&rule->dest_attr,
sizeof(__entry->destination));
- if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
- rule->dest_attr.counter)
+ if (rule->dest_attr.type &
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
__entry->counter_id =
- rule->dest_attr.counter->id;
+ rule->dest_attr.counter_id;
),
TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
__entry->rule, __entry->fte, __entry->index,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 24e3b564964f..023dc4bccd28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -235,3 +235,211 @@ out:
kfree(out);
return err;
}
+
+static u32 fec_supported_speeds[] = {
+ 10000,
+ 40000,
+ 25000,
+ 50000,
+ 56000,
+ 100000
+};
+
+#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds)
+
+/* get/set FEC admin field for a given speed */
+static int mlx5e_fec_admin_field(u32 *pplm,
+ u8 *fec_policy,
+ bool write,
+ u32 speed)
+{
+ switch (speed) {
+ case 10000:
+ case 40000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_10g_40g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_10g_40g, *fec_policy);
+ break;
+ case 25000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_25g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_25g, *fec_policy);
+ break;
+ case 50000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_50g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_50g, *fec_policy);
+ break;
+ case 56000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_56g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_56g, *fec_policy);
+ break;
+ case 100000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_100g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_100g, *fec_policy);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* returns FEC capabilities for a given speed */
+static int mlx5e_get_fec_cap_field(u32 *pplm,
+ u8 *fec_cap,
+ u32 speed)
+{
+ switch (speed) {
+ case 10000:
+ case 40000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_10g_40g);
+ break;
+ case 25000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_25g);
+ break;
+ case 50000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_50g);
+ break;
+ case 56000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_56g);
+ break;
+ case 100000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_100g);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps)
+{
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u32 current_fec_speed;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+ if (err)
+ return err;
+
+ return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed);
+}
+
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u8 *fec_configured_mode)
+{
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u32 link_speed;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
+
+ if (!fec_configured_mode)
+ return 0;
+
+ err = mlx5e_port_linkspeed(dev, &link_speed);
+ if (err)
+ return err;
+
+ return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed);
+}
+
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
+{
+ bool fec_mode_not_supp_in_speed = false;
+ u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u32 current_fec_speed;
+ u8 fec_caps = 0;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+ if (err)
+ return err;
+
+ memset(in, 0, sz);
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) {
+ mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]);
+ /* policy supported for link speed */
+ if (!!(fec_caps & fec_policy)) {
+ mlx5e_fec_admin_field(in, &fec_policy, 1,
+ fec_supported_speeds[i]);
+ } else {
+ if (fec_supported_speeds[i] == current_fec_speed)
+ return -EOPNOTSUPP;
+ mlx5e_fec_admin_field(in, &no_fec_policy, 1,
+ fec_supported_speeds[i]);
+ fec_mode_not_supp_in_speed = true;
+ }
+ }
+
+ if (fec_mode_not_supp_in_speed)
+ mlx5_core_dbg(dev,
+ "FEC policy 0x%x is not supported for some speeds",
+ fec_policy);
+
+ return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index f8cbd8194179..cd2160b8c9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -45,4 +45,16 @@ int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps);
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u8 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy);
+
+enum {
+ MLX5E_FEC_NOFEC,
+ MLX5E_FEC_FIRECODE,
+ MLX5E_FEC_RS_528_514,
+};
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index db3278cc052b..3078491cc0d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
if (enable_uc_lb)
MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c61f6302cde2..3e770abfd802 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -547,6 +547,70 @@ static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
+static const u32 pplm_fec_2_ethtool[] = {
+ [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF,
+ [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER,
+ [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
+};
+
+static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
+{
+ int mode = 0;
+
+ if (!fec_mode)
+ return ETHTOOL_FEC_AUTO;
+
+ mode = find_first_bit(&fec_mode, size);
+
+ if (mode < ARRAY_SIZE(pplm_fec_2_ethtool))
+ return pplm_fec_2_ethtool[mode];
+
+ return 0;
+}
+
+/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */
+static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code)
+{
+ u32 offset;
+
+ offset = find_first_bit(&ethtool_fec_code, sizeof(u32));
+ offset -= ETHTOOL_FEC_OFF_BIT;
+ offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT;
+
+ return offset;
+}
+
+static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ u_long fec_caps = 0;
+ u32 active_fec = 0;
+ u32 offset;
+ u32 bitn;
+ int err;
+
+ err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps);
+ if (err)
+ return (err == -EOPNOTSUPP) ? 0 : err;
+
+ err = mlx5e_get_fec_mode(dev, &active_fec, NULL);
+ if (err)
+ return err;
+
+ for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) {
+ u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn];
+
+ offset = ethtool_fec2ethtool_caps(ethtool_bitmask);
+ __set_bit(offset, link_ksettings->link_modes.supported);
+ }
+
+ active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE);
+ offset = ethtool_fec2ethtool_caps(active_fec);
+ __set_bit(offset, link_ksettings->link_modes.advertising);
+
+ return 0;
+}
+
static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
u32 eth_proto_cap,
u8 connector_type)
@@ -742,7 +806,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
if (err) {
netdev_err(netdev, "%s: query port ptys failed: %d\n",
__func__, err);
- goto err_query_ptys;
+ goto err_query_regs;
}
eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
@@ -778,11 +842,17 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
AUTONEG_ENABLE;
ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
Autoneg);
+
+ err = get_fec_supported_advertised(mdev, link_ksettings);
+ if (err)
+ netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
+ __func__, err);
+
if (!an_disable_admin)
ethtool_link_ksettings_add_link_mode(link_ksettings,
advertising, Autoneg);
-err_query_ptys:
+err_query_regs:
return err;
}
@@ -1277,6 +1347,58 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
return mlx5_set_port_wol(mdev, mlx5_wol_mode);
}
+static int mlx5e_get_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fecparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 fec_configured = 0;
+ u32 fec_active = 0;
+ int err;
+
+ err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured);
+
+ if (err)
+ return err;
+
+ fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active,
+ sizeof(u32) * BITS_PER_BYTE);
+
+ if (!fecparam->active_fec)
+ return -EOPNOTSUPP;
+
+ fecparam->fec = pplm2ethtool_fec((u_long)fec_configured,
+ sizeof(u8) * BITS_PER_BYTE);
+
+ return 0;
+}
+
+static int mlx5e_set_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fecparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 fec_policy = 0;
+ int mode;
+ int err;
+
+ for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
+ if (!(pplm_fec_2_ethtool[mode] & fecparam->fec))
+ continue;
+ fec_policy |= (1 << mode);
+ break;
+ }
+
+ err = mlx5e_set_fec_mode(mdev, fec_policy);
+
+ if (err)
+ return err;
+
+ mlx5_toggle_port_link(mdev);
+
+ return 0;
+}
+
static u32 mlx5e_get_msglevel(struct net_device *dev)
{
return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
@@ -1699,4 +1821,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.self_test = mlx5e_self_test,
.get_msglevel = mlx5e_get_msglevel,
.set_msglevel = mlx5e_set_msglevel,
+ .get_fecparam = mlx5e_get_fecparam,
+ .set_fecparam = mlx5e_set_fecparam,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c9848e333450..1243edbedc9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3392,9 +3392,6 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
- return -EOPNOTSUPP;
-
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 64c2b9ea8b1e..c3c657548824 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -853,9 +853,6 @@ static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
- return -EOPNOTSUPP;
-
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
@@ -869,9 +866,6 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
- return -EOPNOTSUPP;
-
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index f19067c94272..2f7fb8de6967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -433,10 +433,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
- u16 pi, u16 frag_pi)
+ u16 pi, u16 nnops)
{
struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
- u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
edge_wi = wi + nnops;
@@ -455,15 +454,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
- u16 pi, frag_pi;
+ u16 pi, contig_wqebbs_room;
int err;
int i;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
- if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
- mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+ mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 1c006869a642..1e55b9c27ffc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -614,46 +614,82 @@ static const struct counter_desc pport_phy_statistical_stats_desc[] = {
{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
};
-#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
+static const struct counter_desc
+pport_phy_statistical_err_lanes_stats_desc[] = {
+ { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) },
+ { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) },
+ { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) },
+ { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \
+ ARRAY_SIZE(pport_phy_statistical_stats_desc)
+#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \
+ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc)
static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int num_stats;
+
/* "1" for link_down_events special counter */
- return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
- NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
+ num_stats = 1;
+
+ num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ?
+ NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0;
+
+ num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ?
+ NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0;
+
+ return num_stats;
}
static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
int idx)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int i;
strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
- if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
return idx;
for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
pport_phy_statistical_stats_desc[i].format);
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_phy_statistical_err_lanes_stats_desc[i].format);
+
return idx;
}
static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int i;
/* link_down_events_phy has special handling since it is not stored in __be64 format */
data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
counter_set.phys_layer_cntrs.link_down_events);
- if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
return idx;
for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
data[idx++] =
MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
pport_phy_statistical_stats_desc, i);
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+ pport_phy_statistical_err_lanes_stats_desc,
+ i);
return idx;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 6de21d9f4fad..608025ca5c04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr {
u32 hairpin_tirn;
u8 match_level;
struct mlx5_flow_table *hairpin_ft;
+ struct mlx5_fc *counter;
};
#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
@@ -73,6 +74,7 @@ enum {
MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
+ MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
};
#define MLX5E_TC_MAX_SPLITS 1
@@ -81,7 +83,7 @@ struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
u64 cookie;
- u8 flags;
+ u16 flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
struct list_head encap; /* flows sharing the same encap ID */
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
@@ -100,11 +102,6 @@ struct mlx5e_tc_flow_parse_attr {
int mirred_ifindex;
};
-enum {
- MLX5_HEADER_TYPE_VXLAN = 0x0,
- MLX5_HEADER_TYPE_NVGRE = 0x1,
-};
-
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
@@ -677,7 +674,7 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
}
}
-static struct mlx5_flow_handle *
+static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
@@ -688,19 +685,17 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .has_flow_tag = true,
.flow_tag = attr->flow_tag,
- .encap_id = 0,
+ .reformat_id = 0,
+ .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
- struct mlx5_flow_handle *rule;
bool table_created = false;
int err, dest_ix = 0;
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err) {
- rule = ERR_PTR(err);
goto err_add_hairpin_flow;
}
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
@@ -720,22 +715,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter)) {
- rule = ERR_CAST(counter);
+ err = PTR_ERR(counter);
goto err_fc_create;
}
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[dest_ix].counter = counter;
+ dest[dest_ix].counter_id = mlx5_fc_id(counter);
dest_ix++;
+ attr->counter = counter;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
flow_act.modify_id = attr->mod_hdr_id;
kfree(parse_attr->mod_hdr_actions);
- if (err) {
- rule = ERR_PTR(err);
+ if (err)
goto err_create_mod_hdr_id;
- }
}
if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
@@ -761,7 +755,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
"Failed to create tc offload table\n");
netdev_err(priv->netdev,
"Failed to create tc offload table\n");
- rule = ERR_CAST(priv->fs.tc.t);
+ err = PTR_ERR(priv->fs.tc.t);
goto err_create_ft;
}
@@ -771,13 +765,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->match_level != MLX5_MATCH_NONE)
parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
- &flow_act, dest, dest_ix);
+ flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+ &flow_act, dest, dest_ix);
- if (IS_ERR(rule))
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
goto err_add_rule;
+ }
- return rule;
+ return 0;
err_add_rule:
if (table_created) {
@@ -793,7 +789,7 @@ err_fc_create:
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
mlx5e_hairpin_flow_del(priv, flow);
err_add_hairpin_flow:
- return rule;
+ return err;
}
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
@@ -802,7 +798,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_fc *counter = NULL;
- counter = mlx5_flow_rule_counter(flow->rule[0]);
+ counter = attr->counter;
mlx5_del_flow_rules(flow->rule[0]);
mlx5_fc_destroy(priv->mdev, counter);
@@ -829,28 +825,115 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct netlink_ext_ack *extack);
static struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_flow_handle *rule;
+
+ rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ if (IS_ERR(rule))
+ return rule;
+
+ if (attr->mirror_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
+ if (IS_ERR(flow->rule[1])) {
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ return flow->rule[1];
+ }
+ }
+
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ return rule;
+}
+
+static void
+mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_esw_flow_attr *attr)
+{
+ flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+
+ if (attr->mirror_count)
+ mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *slow_attr)
+{
+ struct mlx5_flow_handle *rule;
+
+ memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ slow_attr->mirror_count = 0,
+ slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN,
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
+ if (!IS_ERR(rule))
+ flow->flags |= MLX5E_TC_FLOW_SLOW;
+
+ return rule;
+}
+
+static void
+mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_esw_flow_attr *slow_attr)
+{
+ memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+ flow->flags &= ~MLX5E_TC_FLOW_SLOW;
+}
+
+static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u32 max_chain = mlx5_eswitch_get_chain_range(esw);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ u16 max_prio = mlx5_eswitch_get_prio_range(esw);
struct net_device *out_dev, *encap_dev = NULL;
- struct mlx5_flow_handle *rule = NULL;
+ struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
- int err;
+ int err = 0, encap_err = 0;
+
+ /* if prios are not supported, keep the old behaviour of using same prio
+ * for all offloaded rules.
+ */
+ if (!mlx5_eswitch_prios_supported(esw))
+ attr->prio = 1;
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ if (attr->chain > max_chain) {
+ NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_max_prio_chain;
+ }
+
+ if (attr->prio > max_prio) {
+ NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_max_prio_chain;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
out_dev = __dev_get_by_index(dev_net(priv->netdev),
attr->parse_attr->mirred_ifindex);
- err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
- out_dev, &encap_dev, flow, extack);
- if (err) {
- rule = ERR_PTR(err);
- if (err != -EAGAIN)
- goto err_attach_encap;
+ encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+ out_dev, &encap_dev, flow,
+ extack);
+ if (encap_err && encap_err != -EAGAIN) {
+ err = encap_err;
+ goto err_attach_encap;
}
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
@@ -859,49 +942,58 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
- if (err) {
- rule = ERR_PTR(err);
+ if (err)
goto err_add_vlan;
- }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
kfree(parse_attr->mod_hdr_actions);
- if (err) {
- rule = ERR_PTR(err);
+ if (err)
goto err_mod_hdr;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_create_counter;
}
+
+ attr->counter = counter;
}
- /* we get here if (1) there's no error (rule being null) or when
+ /* we get here if (1) there's no error or when
* (2) there's an encap action and we're on -EAGAIN (no valid neigh)
*/
- if (rule != ERR_PTR(-EAGAIN)) {
- rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
- if (IS_ERR(rule))
- goto err_add_rule;
-
- if (attr->mirror_count) {
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
- if (IS_ERR(flow->rule[1]))
- goto err_fwd_rule;
- }
+ if (encap_err == -EAGAIN) {
+ /* continue with goto slow path rule instead */
+ struct mlx5_esw_flow_attr slow_attr;
+
+ flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
+ } else {
+ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
}
- return rule;
-err_fwd_rule:
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
- rule = flow->rule[1];
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ goto err_add_rule;
+ }
+
+ return 0;
+
err_add_rule:
+ mlx5_fc_destroy(esw->dev, counter);
+err_create_counter:
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
err_mod_hdr:
mlx5_eswitch_del_vlan_action(esw, attr);
err_add_vlan:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
mlx5e_detach_encap(priv, flow);
err_attach_encap:
- return rule;
+err_max_prio_chain:
+ return err;
}
static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
@@ -909,36 +1001,43 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5_esw_flow_attr slow_attr;
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ if (flow->flags & MLX5E_TC_FLOW_SLOW)
+ mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
}
mlx5_eswitch_del_vlan_action(esw, attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
mlx5e_detach_encap(priv, flow);
kvfree(attr->parse_attr);
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(esw->dev, attr->counter);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_esw_flow_attr slow_attr, *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
struct mlx5e_tc_flow *flow;
int err;
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
- e->encap_size, e->encap_header,
- &e->encap_id);
+ err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ e->encap_size, e->encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
err);
@@ -950,26 +1049,20 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
list_for_each_entry(flow, &e->flows, encap) {
esw_attr = flow->esw_attr;
esw_attr->encap_id = e->encap_id;
- flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
- if (IS_ERR(flow->rule[0])) {
- err = PTR_ERR(flow->rule[0]);
+ spec = &esw_attr->parse_attr->spec;
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
continue;
}
- if (esw_attr->mirror_count) {
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
- if (IS_ERR(flow->rule[1])) {
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
- err = PTR_ERR(flow->rule[1]);
- mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
- err);
- continue;
- }
- }
-
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
+ flow->rule[0] = rule;
}
}
@@ -977,25 +1070,44 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr slow_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
struct mlx5e_tc_flow *flow;
+ int err;
list_for_each_entry(flow, &e->flows, encap) {
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ spec = &flow->esw_attr->parse_attr->spec;
+
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
}
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
+ flow->rule[0] = rule;
}
if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
+ mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
}
}
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ return flow->esw_attr->counter;
+ else
+ return flow->nic_attr->counter;
+}
+
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
@@ -1021,7 +1133,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
continue;
list_for_each_entry(flow, &e->flows, encap) {
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- counter = mlx5_flow_rule_counter(flow->rule[0]);
+ counter = mlx5e_tc_get_counter(flow);
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
neigh_used = true;
@@ -1061,7 +1173,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
+ mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
hash_del_rcu(&e->encap_hlist);
kfree(e->encap_header);
@@ -2391,7 +2503,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
return -ENOMEM;
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
fl4.flowi4_proto = IPPROTO_UDP;
fl4.fl4_dport = tun_key->tp_dst;
break;
@@ -2435,7 +2547,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
read_unlock_bh(&n->lock);
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
gen_vxlan_header_ipv4(out_dev, encap_header,
ipv4_encap_size, e->h_dest, tos, ttl,
fl4.daddr,
@@ -2455,8 +2567,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
- ipv4_encap_size, encap_header, &e->encap_id);
+ err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
if (err)
goto destroy_neigh_entry;
@@ -2500,7 +2614,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
return -ENOMEM;
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
fl6.flowi6_proto = IPPROTO_UDP;
fl6.fl6_dport = tun_key->tp_dst;
break;
@@ -2544,7 +2658,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
read_unlock_bh(&n->lock);
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
gen_vxlan_header_ipv6(out_dev, encap_header,
ipv6_encap_size, e->h_dest, tos, ttl,
&fl6.daddr,
@@ -2565,8 +2679,10 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
- ipv6_encap_size, encap_header, &e->encap_id);
+ err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
if (err)
goto destroy_neigh_entry;
@@ -2617,7 +2733,7 @@ vxlan_encap_offload_err:
if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
- tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+ tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
} else {
NL_SET_ERR_MSG_MOD(extack,
"port isn't an offloaded vxlan udp dport");
@@ -2728,6 +2844,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct ip_tunnel_info *info = NULL;
@@ -2797,7 +2914,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
parse_attr->mirred_ifindex = out_dev->ifindex;
parse_attr->tun_info = *info;
attr->parse_attr = parse_attr;
- action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
/* attr->out_rep is resolved when we handle encap */
@@ -2836,6 +2953,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
continue;
}
+ if (is_tcf_gact_goto_chain(a)) {
+ u32 dest_chain = tcf_gact_goto_chain_index(a);
+ u32 max_chain = mlx5_eswitch_get_chain_range(esw);
+
+ if (dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = dest_chain;
+
+ continue;
+ }
+
return -EINVAL;
}
@@ -2853,9 +2989,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return 0;
}
-static void get_flags(int flags, u8 *flow_flags)
+static void get_flags(int flags, u16 *flow_flags)
{
- u8 __flow_flags = 0;
+ u16 __flow_flags = 0;
if (flags & MLX5E_TC_INGRESS)
__flow_flags |= MLX5E_TC_FLOW_INGRESS;
@@ -2884,34 +3020,15 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
return &priv->fs.tc.ht;
}
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags)
+static int
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
+ struct tc_cls_flower_offload *f, u16 flow_flags,
+ struct mlx5e_tc_flow_parse_attr **__parse_attr,
+ struct mlx5e_tc_flow **__flow)
{
- struct netlink_ext_ack *extack = f->common.extack;
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct rhashtable *tc_ht = get_tc_ht(priv);
struct mlx5e_tc_flow *flow;
- int attr_size, err = 0;
- u8 flow_flags = 0;
-
- get_flags(flags, &flow_flags);
-
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
- if (flow) {
- NL_SET_ERR_MSG_MOD(extack,
- "flow cookie already exists, ignoring");
- netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
- return 0;
- }
-
- if (esw && esw->mode == SRIOV_OFFLOADS) {
- flow_flags |= MLX5E_TC_FLOW_ESWITCH;
- attr_size = sizeof(struct mlx5_esw_flow_attr);
- } else {
- flow_flags |= MLX5E_TC_FLOW_NIC;
- attr_size = sizeof(struct mlx5_nic_flow_attr);
- }
+ int err;
flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
@@ -2925,49 +3042,161 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
flow->priv = priv;
err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
- if (err < 0)
+ if (err)
goto err_free;
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
- err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow,
- extack);
- if (err < 0)
- goto err_free;
- flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow,
- extack);
- } else {
- err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow,
- extack);
- if (err < 0)
- goto err_free;
- flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow,
- extack);
- }
+ *__flow = flow;
+ *__parse_attr = parse_attr;
- if (IS_ERR(flow->rule[0])) {
- err = PTR_ERR(flow->rule[0]);
- if (err != -EAGAIN)
- goto err_free;
- }
+ return 0;
+
+err_free:
+ kfree(flow);
+ kvfree(parse_attr);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err;
- if (err != -EAGAIN)
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+ &parse_attr, &flow);
+ if (err)
+ goto out;
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
- !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+ flow->esw_attr->chain = f->common.chain_index;
+ flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+ err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ if (!(flow->esw_attr->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
kvfree(parse_attr);
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
- if (err) {
- mlx5e_tc_del_flow(priv, flow);
- kfree(flow);
- }
+ *__flow = flow;
+ return 0;
+
+err_free:
+ kfree(flow);
+ kvfree(parse_attr);
+out:
return err;
+}
+
+static int
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err;
+
+ /* multi-chain not supported for NIC rules */
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ return -EOPNOTSUPP;
+
+ flow_flags |= MLX5E_TC_FLOW_NIC;
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+ &parse_attr, &flow);
+ if (err)
+ goto out;
+
+ err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ kvfree(parse_attr);
+ *__flow = flow;
+
+ return 0;
err_free:
+ kfree(flow);
kvfree(parse_attr);
+out:
+ return err;
+}
+
+static int
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ int flags,
+ struct mlx5e_tc_flow **flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u16 flow_flags;
+ int err;
+
+ get_flags(flags, &flow_flags);
+
+ if (!tc_can_offload_extack(priv->netdev, f->common.extack))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == SRIOV_OFFLOADS)
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow);
+ else
+ err = mlx5e_add_nic_flow(priv, f, flow_flags, flow);
+
+ return err;
+}
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+ int err = 0;
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (flow) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "flow cookie already exists, ignoring");
+ netdev_warn_once(priv->netdev,
+ "flow cookie %lx already exists, ignoring\n",
+ f->cookie);
+ goto out;
+ }
+
+ err = mlx5e_tc_add_flow(priv, f, flags, &flow);
+ if (err)
+ goto out;
+
+ err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
+ if (err)
+ goto err_free;
+
+ return 0;
+
+err_free:
+ mlx5e_tc_del_flow(priv, flow);
kfree(flow);
+out:
return err;
}
@@ -3018,7 +3247,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
return 0;
- counter = mlx5_flow_rule_counter(flow->rule[0]);
+ counter = mlx5e_tc_get_counter(flow);
if (!counter)
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ae73ea992845..6dacaeba2fbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -290,10 +290,9 @@ dma_unmap_wqe_err:
static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
struct mlx5_wq_cyc *wq,
- u16 pi, u16 frag_pi)
+ u16 pi, u16 nnops)
{
struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
- u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
edge_wi = wi + nnops;
@@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
+ u16 headlen, ihs, contig_wqebbs_room;
u16 ds_cnt, ds_cnt_inl = 0;
- u16 headlen, ihs, frag_pi;
u8 num_wqebbs, opcode;
u32 num_bytes;
int num_dma;
@@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
- if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
- mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
}
@@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 headlen, ihs, pi, frag_pi;
+ u16 headlen, ihs, pi, contig_wqebbs_room;
u16 ds_cnt, ds_cnt_inl = 0;
u8 num_wqebbs, opcode;
u32 num_bytes;
@@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
- if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
}
- mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
+ mlx5i_sq_fetch_wqe(sq, &wqe, pi);
/* fill wqe */
wi = &sq->db.wqe_info[pi];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 48864f4988a4..c1e1a16a9b07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -273,7 +273,7 @@ static void eq_pf_process(struct mlx5_eq *eq)
case MLX5_PFAULT_SUBTYPE_WQE:
/* WQE based event */
pfault->type =
- be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24;
+ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
pfault->token =
be32_to_cpu(pf_eqe->wqe.token);
pfault->wqe.wq_num =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ea7dedc2d5ad..d004957328f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
esw_debug(dev, "Create FDB log_max_size(%d)\n",
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
- root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
if (!root_ns) {
esw_warn(dev, "Failed to get FDB flow namespace\n");
return -EOPNOTSUPP;
@@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
if (counter) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter = counter;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
dst = &drop_ctr_dst;
dest_num++;
}
@@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
if (counter) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter = counter;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
dst = &drop_ctr_dst;
dest_num++;
}
@@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->enabled_vports = 0;
esw->mode = SRIOV_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index dfc642de4e6d..aaafc9f17115 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -59,6 +59,10 @@
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+#define FDB_MAX_CHAIN 3
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 16
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_group *allow_untagged_spoofchk_grp;
@@ -120,6 +124,13 @@ struct mlx5_vport {
u16 enabled_events;
};
+enum offloads_fdb_flags {
+ ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+};
+
+extern const unsigned int ESW_POOLS[4];
+
+#define PRIO_LEVELS 2
struct mlx5_eswitch_fdb {
union {
struct legacy_fdb {
@@ -130,16 +141,24 @@ struct mlx5_eswitch_fdb {
} legacy;
struct offloads_fdb {
- struct mlx5_flow_table *fast_fdb;
- struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
+
+ struct {
+ struct mlx5_flow_table *fdb;
+ u32 num_rules;
+ } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS];
+ /* Protects fdb_prio table */
+ struct mutex fdb_prio_lock;
+
+ int fdb_left[ARRAY_SIZE(ESW_POOLS)];
} offloads;
};
+ u32 flags;
};
struct mlx5_esw_offload {
@@ -181,6 +200,7 @@ struct mlx5_eswitch {
struct mlx5_esw_offload offloads;
int mode;
+ int nvports;
};
void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
@@ -228,6 +248,19 @@ void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr);
+
+bool
+mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
+
+u16
+mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
+
+u32
+mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport,
@@ -266,6 +299,10 @@ struct mlx5_esw_flow_attr {
u32 encap_id;
u32 mod_hdr_id;
u8 match_level;
+ struct mlx5_fc *counter;
+ u32 chain;
+ u16 prio;
+ u32 dest_chain;
struct mlx5e_tc_flow_parse_attr *parse_attr;
};
@@ -318,6 +355,11 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+
+#define FDB_MAX_CHAIN 1
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 1
+
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a35a2310f871..9eac137790f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -37,33 +37,59 @@
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "eswitch.h"
+#include "en.h"
+#include "fs_core.h"
enum {
FDB_FAST_PATH = 0,
FDB_SLOW_PATH
};
+#define fdb_prio_table(esw, chain, prio, level) \
+ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+
+bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
+{
+ return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
+}
+
+u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+ return FDB_MAX_CHAIN;
+
+ return 0;
+}
+
+u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+ return FDB_MAX_PRIO;
+
+ return 1;
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_esw_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_table *ft = NULL;
- struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ bool mirror = !!(attr->mirror_count);
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_table *fdb;
int j, i = 0;
void *misc;
if (esw->mode != SRIOV_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
- if (attr->mirror_count)
- ft = esw->fdb_table.offloads.fwd_fdb;
- else
- ft = esw->fdb_table.offloads.fast_fdb;
-
flow_act.action = attr->action;
/* if per flow vlan pop/push is emulated, don't set that into the firmware */
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
@@ -81,23 +107,33 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- for (j = attr->mirror_count; j < attr->out_count; j++) {
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[j]->vport;
- dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
- dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ if (attr->dest_chain) {
+ struct mlx5_flow_table *ft;
+
+ ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
+ if (IS_ERR(ft)) {
+ rule = ERR_CAST(ft);
+ goto err_create_goto_table;
+ }
+
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = ft;
i++;
+ } else {
+ for (j = attr->mirror_count; j < attr->out_count; j++) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.vhca_id =
+ MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
+ dest[i].vport.vhca_id_valid =
+ !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ i++;
+ }
}
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw->dev, true);
- if (IS_ERR(counter)) {
- rule = ERR_CAST(counter);
- goto err_counter_alloc;
- }
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[i].counter = counter;
+ dest[i].counter_id = mlx5_fc_id(attr->counter);
i++;
}
@@ -127,10 +163,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_id = attr->mod_hdr_id;
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
- flow_act.encap_id = attr->encap_id;
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ flow_act.reformat_id = attr->encap_id;
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
+ fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ if (IS_ERR(fdb)) {
+ rule = ERR_CAST(fdb);
+ goto err_esw_get;
+ }
+
+ rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
if (IS_ERR(rule))
goto err_add_rule;
else
@@ -139,8 +181,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
return rule;
err_add_rule:
- mlx5_fc_destroy(esw->dev, counter);
-err_counter_alloc:
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+err_esw_get:
+ if (attr->dest_chain)
+ esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+err_create_goto_table:
return rule;
}
@@ -150,11 +195,25 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
- struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_flow_table *fast_fdb;
+ struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
void *misc;
int i;
+ fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
+ if (IS_ERR(fast_fdb)) {
+ rule = ERR_CAST(fast_fdb);
+ goto err_get_fast;
+ }
+
+ fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
+ if (IS_ERR(fwd_fdb)) {
+ rule = ERR_CAST(fwd_fdb);
+ goto err_get_fwd;
+ }
+
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
for (i = 0; i < attr->mirror_count; i++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -164,7 +223,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = esw->fdb_table.offloads.fwd_fdb,
+ dest[i].ft = fwd_fdb,
i++;
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
@@ -187,12 +246,41 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
MLX5_MATCH_MISC_PARAMETERS;
- rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i);
+ rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
- if (!IS_ERR(rule))
- esw->offloads.num_flows++;
+ if (IS_ERR(rule))
+ goto add_err;
+
+ esw->offloads.num_flows++;
return rule;
+add_err:
+ esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+err_get_fwd:
+ esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+err_get_fast:
+ return rule;
+}
+
+static void
+__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr,
+ bool fwd_rule)
+{
+ bool mirror = (attr->mirror_count > 0);
+
+ mlx5_del_flow_rules(rule);
+ esw->offloads.num_flows--;
+
+ if (fwd_rule) {
+ esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+ esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+ } else {
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ if (attr->dest_chain)
+ esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+ }
}
void
@@ -200,12 +288,15 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *attr)
{
- struct mlx5_fc *counter = NULL;
+ __mlx5_eswitch_del_rule(esw, rule, attr, false);
+}
- counter = mlx5_flow_rule_counter(rule);
- mlx5_del_flow_rules(rule);
- mlx5_fc_destroy(esw->dev, counter);
- esw->offloads.num_flows--;
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr)
+{
+ __mlx5_eswitch_del_rule(esw, rule, attr, true);
}
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
@@ -294,7 +385,8 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
- fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+ !attr->dest_chain);
err = esw_add_vlan_action_check(attr, push, pop, fwd);
if (err)
@@ -501,74 +593,170 @@ out:
#define ESW_OFFLOADS_NUM_GROUPS 4
-static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
+ * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via put/get_sz_to_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small
+ */
+#define ESW_SIZE (16 * 1024 * 1024)
+const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
+ 64 * 1024, 4 * 1024 };
+
+static int
+get_sz_from_pool(struct mlx5_eswitch *esw)
+{
+ int sz = 0, i;
+
+ for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+ if (esw->fdb_table.offloads.fdb_left[i]) {
+ --esw->fdb_table.offloads.fdb_left[i];
+ sz = ESW_POOLS[i];
+ break;
+ }
+ }
+
+ return sz;
+}
+
+static void
+put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+ if (sz >= ESW_POOLS[i]) {
+ ++esw->fdb_table.offloads.fdb_left[i];
+ break;
+ }
+ }
+}
+
+static struct mlx5_flow_table *
+create_next_size_table(struct mlx5_eswitch *esw,
+ struct mlx5_flow_namespace *ns,
+ u16 table_prio,
+ int level,
+ u32 flags)
+{
+ struct mlx5_flow_table *fdb;
+ int sz;
+
+ sz = get_sz_from_pool(esw);
+ if (!sz)
+ return ERR_PTR(-ENOSPC);
+
+ fdb = mlx5_create_auto_grouped_flow_table(ns,
+ table_prio,
+ sz,
+ ESW_OFFLOADS_NUM_GROUPS,
+ level,
+ flags);
+ if (IS_ERR(fdb)) {
+ esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
+ (int)PTR_ERR(fdb), table_prio, level, sz);
+ put_sz_to_pool(esw, sz);
+ }
+
+ return fdb;
+}
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
{
struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
- int esw_size, err = 0;
+ struct mlx5_flow_namespace *ns;
+ int table_prio, l = 0;
u32 flags = 0;
- u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
- root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
- if (!root_ns) {
- esw_warn(dev, "Failed to get FDB flow namespace\n");
- err = -EOPNOTSUPP;
- goto out_namespace;
- }
+ if (chain == FDB_SLOW_PATH_CHAIN)
+ return esw->fdb_table.offloads.slow_fdb;
- esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
- max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
+ mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
- esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
- 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+ fdb = fdb_prio_table(esw, chain, prio, level).fdb;
+ if (fdb) {
+ /* take ref on earlier levels as well */
+ while (level >= 0)
+ fdb_prio_table(esw, chain, prio, level--).num_rules++;
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ return fdb;
+ }
- if (mlx5_esw_has_fwd_fdb(dev))
- esw_size >>= 1;
+ ns = mlx5_get_fdb_sub_ns(dev, chain);
+ if (!ns) {
+ esw_warn(dev, "Failed to get FDB sub namespace\n");
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+ flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
- esw_size,
- ESW_OFFLOADS_NUM_GROUPS, 0,
- flags);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
- goto out_namespace;
- }
- esw->fdb_table.offloads.fast_fdb = fdb;
+ table_prio = (chain * FDB_MAX_PRIO) + prio - 1;
- if (!mlx5_esw_has_fwd_fdb(dev))
- goto out_namespace;
+ /* create earlier levels for correct fs_core lookup when
+ * connecting tables
+ */
+ for (l = 0; l <= level; l++) {
+ if (fdb_prio_table(esw, chain, prio, l).fdb) {
+ fdb_prio_table(esw, chain, prio, l).num_rules++;
+ continue;
+ }
- fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
- esw_size,
- ESW_OFFLOADS_NUM_GROUPS, 1,
- flags);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create fwd table err %d\n", err);
- goto out_ft;
+ fdb = create_next_size_table(esw, ns, table_prio, l, flags);
+ if (IS_ERR(fdb)) {
+ l--;
+ goto err_create_fdb;
+ }
+
+ fdb_prio_table(esw, chain, prio, l).fdb = fdb;
+ fdb_prio_table(esw, chain, prio, l).num_rules = 1;
}
- esw->fdb_table.offloads.fwd_fdb = fdb;
- return err;
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ return fdb;
-out_ft:
- mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
-out_namespace:
- return err;
+err_create_fdb:
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ if (l >= 0)
+ esw_put_prio_table(esw, chain, prio, l);
+
+ return fdb;
+}
+
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
+{
+ int l;
+
+ if (chain == FDB_SLOW_PATH_CHAIN)
+ return;
+
+ mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
+
+ for (l = level; l >= 0; l--) {
+ if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
+ continue;
+
+ put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
+ mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
+ fdb_prio_table(esw, chain, prio, l).fdb = NULL;
+ }
+
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
}
-static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
{
- if (mlx5_esw_has_fwd_fdb(esw->dev))
- mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb);
- mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+ /* If lazy creation isn't supported, deref the fast path tables */
+ if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
+ esw_put_prio_table(esw, 0, 1, 1);
+ esw_put_prio_table(esw, 0, 1, 0);
+ }
}
#define MAX_PF_SQ 256
@@ -579,12 +767,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
+ u32 *flow_group_in, max_flow_counter;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
- int table_size, ix, err = 0;
+ int table_size, ix, err = 0, i;
struct mlx5_flow_group *g;
+ u32 flags = 0, fdb_max;
void *match_criteria;
- u32 *flow_group_in;
u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
@@ -599,12 +788,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
goto ns_err;
}
- err = esw_create_offloads_fast_fdb_table(esw);
- if (err)
- goto fast_fdb_err;
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+ fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+ esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
+ max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
+ fdb_max);
+
+ for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
+ esw->fdb_table.offloads.fdb_left[i] =
+ ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+ /* create the slow path fdb with encap set, so further table instances
+ * can be created at run time while VFs are probed if the FW allows that.
+ */
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ ft_attr.flags = flags;
ft_attr.max_fte = table_size;
ft_attr.prio = FDB_SLOW_PATH;
@@ -616,6 +822,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
esw->fdb_table.offloads.slow_fdb = fdb;
+ /* If lazy creation isn't supported, open the fast path tables now */
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
+ esw_get_prio_table(esw, 0, 1, 0);
+ esw_get_prio_table(esw, 0, 1, 1);
+ } else {
+ esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
+ esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+ }
+
/* create send-to-vport group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -663,6 +881,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
if (err)
goto miss_rule_err;
+ esw->nvports = nvports;
kvfree(flow_group_in);
return 0;
@@ -671,10 +890,9 @@ miss_rule_err:
miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
+ esw_destroy_offloads_fast_fdb_tables(esw);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
slow_fdb_err:
- esw_destroy_offloads_fast_fdb_table(esw);
-fast_fdb_err:
ns_err:
kvfree(flow_group_in);
return err;
@@ -682,7 +900,7 @@ ns_err:
static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
{
- if (!esw->fdb_table.offloads.fast_fdb)
+ if (!esw->fdb_table.offloads.slow_fdb)
return;
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
@@ -692,7 +910,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
- esw_destroy_offloads_fast_fdb_table(esw);
+ esw_destroy_offloads_fast_fdb_tables(esw);
}
static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@ -949,6 +1167,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
+ mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
+
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
return err;
@@ -1256,7 +1476,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
return err;
if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
- (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+ (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
return -EOPNOTSUPP;
@@ -1277,16 +1497,19 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
return -EOPNOTSUPP;
}
- esw_destroy_offloads_fast_fdb_table(esw);
+ esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
- err = esw_create_offloads_fast_fdb_table(esw);
+
+ err = esw_create_offloads_fdb_tables(esw, esw->nvports);
+
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed re-creating fast FDB table");
esw->offloads.encap = !encap;
- (void)esw_create_offloads_fast_fdb_table(esw);
+ (void)esw_create_offloads_fdb_tables(esw, esw->nvports);
}
+
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 5645a4facad2..515e3d6de051 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -245,7 +245,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
return ERR_PTR(res);
}
- /* Context will be freed by wait func after completion */
+ /* Context should be freed by the caller after completion. */
return context;
}
@@ -418,10 +418,8 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
cmd.flags = htonl(flags);
context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
- if (IS_ERR(context)) {
- err = PTR_ERR(context);
- goto out;
- }
+ if (IS_ERR(context))
+ return PTR_ERR(context);
err = mlx5_fpga_ipsec_cmd_wait(context);
if (err)
@@ -435,6 +433,7 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
}
out:
+ kfree(context);
return err;
}
@@ -650,7 +649,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
(match_criteria_enable &
~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
(flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- flow_act->has_flow_tag)
+ (flow_act->flags & FLOW_ACT_HAS_TAG))
return false;
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 8e01f818021b..08a891f9aade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *next_ft,
unsigned int *table_id, u32 flags)
{
- int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+ int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+ int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
int err;
@@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
}
MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
- en_encap_decap);
- MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
- en_encap_decap);
+ en_decap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+ en_encap);
switch (op_mod) {
case FS_FT_OP_MOD_NORMAL:
@@ -343,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
- MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.reformat_id);
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_id);
@@ -417,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
continue;
MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
- dst->dest_attr.counter->id);
+ dst->dest_attr.counter_id);
in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
list_size++;
}
@@ -594,62 +596,78 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
*bytes = MLX5_GET64(traffic_counter, stats, octets);
}
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
- int header_type,
- size_t size,
- void *encap_header,
- u32 *encap_id)
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ u32 *packet_reformat_id)
{
- int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
- u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
- void *encap_header_in;
- void *header;
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+ void *packet_reformat_context_in;
+ int max_encap_size;
+ void *reformat;
int inlen;
int err;
u32 *in;
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB)
+ max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+ else
+ max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
+
if (size > max_encap_size) {
mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
size, max_encap_size);
return -EINVAL;
}
- in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+ in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
GFP_KERNEL);
if (!in)
return -ENOMEM;
- encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
- header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
- inlen = header - (void *)in + size;
+ packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in,
+ in, packet_reformat_context);
+ reformat = MLX5_ADDR_OF(packet_reformat_context_in,
+ packet_reformat_context_in,
+ reformat_data);
+ inlen = reformat - (void *)in + size;
memset(in, 0, inlen);
- MLX5_SET(alloc_encap_header_in, in, opcode,
- MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
- MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
- MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
- memcpy(header, encap_header, size);
+ MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_data_size, size);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_type, reformat_type);
+ memcpy(reformat, reformat_data, size);
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+ *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out,
+ out, packet_reformat_id);
kfree(in);
return err;
}
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+ u32 packet_reformat_id)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
- u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
memset(in, 0, sizeof(in));
- MLX5_SET(dealloc_encap_header_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
- MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+ packet_reformat_id);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
u8 namespace, u8 num_actions,
@@ -667,9 +685,14 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
table_type = FS_FT_FDB;
break;
case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_BYPASS:
max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_RX;
break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
+ table_type = FS_FT_NIC_TX;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -702,6 +725,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
kfree(in);
return err;
}
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
{
@@ -716,6 +740,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.create_flow_table = mlx5_cmd_create_flow_table,
@@ -760,8 +785,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
case FS_FT_FDB:
case FS_FT_SNIFFER_RX:
case FS_FT_SNIFFER_TX:
- return mlx5_fs_cmd_get_fw_cmds();
case FS_FT_NIC_TX:
+ return mlx5_fs_cmd_get_fw_cmds();
default:
return mlx5_fs_cmd_get_stub_cmds();
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 37d114c668b7..67ba4c975d81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,6 +40,7 @@
#include "diag/fs_tracepoint.h"
#include "accel/ipsec.h"
#include "fpga/ipsec.h"
+#include "eswitch.h"
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
@@ -76,6 +77,14 @@
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+#define FS_CHAINING_CAPS_EGRESS \
+ FS_REQUIRED_CAPS( \
+ FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_transmit.modify_root), \
+ FS_CAP(flow_table_properties_nic_transmit \
+ .identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+
#define LEFTOVERS_NUM_LEVELS 1
#define LEFTOVERS_NUM_PRIOS 1
@@ -151,6 +160,17 @@ static struct init_tree_node {
}
};
+static struct init_tree_node egress_root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 1,
+ .children = (struct init_tree_node[]) {
+ ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ }
+};
+
enum fs_i_lock_class {
FS_LOCK_GRANDPARENT,
FS_LOCK_PARENT,
@@ -694,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
struct fs_node *iter = list_entry(start, struct fs_node, list);
struct mlx5_flow_table *ft = NULL;
- if (!root)
+ if (!root || root->type == FS_TYPE_PRIO_CHAINS)
return NULL;
list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -1388,7 +1408,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
return false;
if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
MLX5_FLOW_CONTEXT_ACTION_DECAP |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
@@ -1408,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
return -EEXIST;
}
- if (flow_act->has_flow_tag &&
+ if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
fte->action.flow_tag != flow_act->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
@@ -1455,29 +1475,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
return handle;
}
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
+static bool counter_is_valid(u32 action)
{
- struct mlx5_flow_rule *dst;
- struct fs_fte *fte;
-
- fs_get_obj(fte, handle->rule[0]->node.parent);
-
- fs_for_each_dst(dst, fte) {
- if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
- return dst->dest_attr.counter;
- }
-
- return NULL;
-}
-
-static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
-{
- if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
- return !counter;
-
- if (!counter)
- return false;
-
return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
}
@@ -1487,7 +1486,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
struct mlx5_flow_table *ft)
{
if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
- return counter_is_valid(dest->counter, action);
+ return counter_is_valid(action);
if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return true;
@@ -1629,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
search_again_locked:
version = matched_fgs_get_version(match_head);
+ if (flow_act->flags & FLOW_ACT_NO_APPEND)
+ goto skip_search;
/* Try to find a fg that already contains a matching fte */
list_for_each_entry(iter, match_head, list) {
struct fs_fte *fte_tmp;
@@ -1645,6 +1646,11 @@ search_again_locked:
return rule;
}
+skip_search:
+ /* No group with matching fte found, or we skipped the search.
+ * Try to add a new fte to any matching fg.
+ */
+
/* Check the ft version, for case that new flow group
* was added while the fgs weren't locked
*/
@@ -1975,12 +1981,24 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
fg->id);
}
+struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
+ int n)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ if (!steering || !steering->fdb_sub_ns)
+ return NULL;
+
+ return steering->fdb_sub_ns[n];
+}
+EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+
struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
struct mlx5_flow_root_namespace *root_ns;
- int prio;
+ int prio = 0;
struct fs_prio *fs_prio;
struct mlx5_flow_namespace *ns;
@@ -1988,40 +2006,29 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return NULL;
switch (type) {
- case MLX5_FLOW_NAMESPACE_BYPASS:
- case MLX5_FLOW_NAMESPACE_LAG:
- case MLX5_FLOW_NAMESPACE_OFFLOADS:
- case MLX5_FLOW_NAMESPACE_ETHTOOL:
- case MLX5_FLOW_NAMESPACE_KERNEL:
- case MLX5_FLOW_NAMESPACE_LEFTOVERS:
- case MLX5_FLOW_NAMESPACE_ANCHOR:
- prio = type;
- break;
case MLX5_FLOW_NAMESPACE_FDB:
if (steering->fdb_root_ns)
return &steering->fdb_root_ns->ns;
- else
- return NULL;
+ return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
if (steering->sniffer_rx_root_ns)
return &steering->sniffer_rx_root_ns->ns;
- else
- return NULL;
+ return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
if (steering->sniffer_tx_root_ns)
return &steering->sniffer_tx_root_ns->ns;
- else
- return NULL;
- case MLX5_FLOW_NAMESPACE_EGRESS:
- if (steering->egress_root_ns)
- return &steering->egress_root_ns->ns;
- else
- return NULL;
- default:
return NULL;
+ default:
+ break;
+ }
+
+ if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ root_ns = steering->egress_root_ns;
+ } else { /* Must be NIC RX */
+ root_ns = steering->root_ns;
+ prio = type;
}
- root_ns = steering->root_ns;
if (!root_ns)
return NULL;
@@ -2064,8 +2071,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
}
}
-static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
- unsigned int prio, int num_levels)
+static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio,
+ int num_levels,
+ enum fs_node_type type)
{
struct fs_prio *fs_prio;
@@ -2073,7 +2082,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
if (!fs_prio)
return ERR_PTR(-ENOMEM);
- fs_prio->node.type = FS_TYPE_PRIO;
+ fs_prio->node.type = type;
tree_init_node(&fs_prio->node, NULL, del_sw_prio);
tree_add_node(&fs_prio->node, &ns->node);
fs_prio->num_levels = num_levels;
@@ -2083,6 +2092,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
return fs_prio;
}
+static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
+ unsigned int prio,
+ int num_levels)
+{
+ return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio, int num_levels)
+{
+ return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
+}
+
static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
*ns)
{
@@ -2387,6 +2409,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
cleanup_egress_acls_root_ns(dev);
cleanup_ingress_acls_root_ns(dev);
cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
cleanup_root_ns(steering->egress_root_ns);
@@ -2432,27 +2457,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
- struct fs_prio *prio;
+ struct mlx5_flow_namespace *ns;
+ struct fs_prio *maj_prio;
+ struct fs_prio *min_prio;
+ int levels;
+ int chain;
+ int prio;
+ int err;
steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
if (!steering->fdb_root_ns)
return -ENOMEM;
- prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
- if (IS_ERR(prio))
+ steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
+ FDB_MAX_CHAIN + 1, GFP_KERNEL);
+ if (!steering->fdb_sub_ns)
+ return -ENOMEM;
+
+ levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
+ maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0,
+ levels);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
goto out_err;
+ }
+
+ for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
+ ns = fs_create_namespace(maj_prio);
+ if (IS_ERR(ns)) {
+ err = PTR_ERR(ns);
+ goto out_err;
+ }
+
+ for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
+ min_prio = fs_create_prio(ns, prio, 2);
+ if (IS_ERR(min_prio)) {
+ err = PTR_ERR(min_prio);
+ goto out_err;
+ }
+ }
+
+ steering->fdb_sub_ns[chain] = ns;
+ }
- prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
- if (IS_ERR(prio))
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
goto out_err;
+ }
set_prio_attrs(steering->fdb_root_ns);
return 0;
out_err:
cleanup_root_ns(steering->fdb_root_ns);
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
steering->fdb_root_ns = NULL;
- return PTR_ERR(prio);
+ return err;
}
static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
@@ -2537,16 +2599,23 @@ cleanup_root_ns:
static int init_egress_root_ns(struct mlx5_flow_steering *steering)
{
- struct fs_prio *prio;
+ int err;
steering->egress_root_ns = create_root_ns(steering,
FS_FT_NIC_TX);
if (!steering->egress_root_ns)
return -ENOMEM;
- /* create 1 prio*/
- prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
- return PTR_ERR_OR_ZERO(prio);
+ err = init_root_tree(steering, &egress_root_fs,
+ &steering->egress_root_ns->ns.node);
+ if (err)
+ goto cleanup;
+ set_prio_attrs(steering->egress_root_ns);
+ return 0;
+cleanup:
+ cleanup_root_ns(steering->egress_root_ns);
+ steering->egress_root_ns = NULL;
+ return err;
}
int mlx5_init_fs(struct mlx5_core_dev *dev)
@@ -2614,7 +2683,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}
- if (MLX5_IPSEC_DEV(dev)) {
+ if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
err = init_egress_root_ns(steering);
if (err)
goto err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a06f83c0c2b6..b51ad217da32 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -38,9 +38,21 @@
#include <linux/rhashtable.h>
#include <linux/llist.h>
+/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
+ * and those are in parallel to one another when going over them to connect
+ * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
+ * parallel namespace will not automatically connect to the first flow table
+ * found in any prio in any next namespace, but skip the entire containing
+ * TYPE_PRIO_CHAINS prio.
+ *
+ * This is used to implement tc chains, each chain of prios is a different
+ * namespace inside a containing TYPE_PRIO_CHAINS prio.
+ */
+
enum fs_node_type {
FS_TYPE_NAMESPACE,
FS_TYPE_PRIO,
+ FS_TYPE_PRIO_CHAINS,
FS_TYPE_FLOW_TABLE,
FS_TYPE_FLOW_GROUP,
FS_TYPE_FLOW_ENTRY,
@@ -73,6 +85,7 @@ struct mlx5_flow_steering {
struct kmem_cache *ftes_cache;
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_root_namespace *fdb_root_ns;
+ struct mlx5_flow_namespace **fdb_sub_ns;
struct mlx5_flow_root_namespace **esw_egress_root_ns;
struct mlx5_flow_root_namespace **esw_ingress_root_ns;
struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 09206c4acd9a..32accd6b041b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -99,6 +99,18 @@ static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
list_add_tail(&counter->list, next);
}
+static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ list_del(&counter->list);
+
+ spin_lock(&fc_stats->counters_idr_lock);
+ WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+ spin_unlock(&fc_stats->counters_idr_lock);
+}
+
/* The function returns the last counter that was queried so the caller
* function can continue calling it till all counters are queried.
*/
@@ -179,20 +191,23 @@ static void mlx5_fc_stats_work(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
priv.fc_stats.work.work);
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- struct llist_node *tmplist = llist_del_all(&fc_stats->addlist);
+ /* Take dellist first to ensure that counters cannot be deleted before
+ * they are inserted.
+ */
+ struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
+ struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
unsigned long now = jiffies;
- if (tmplist || !list_empty(&fc_stats->counters))
+ if (addlist || !list_empty(&fc_stats->counters))
queue_delayed_work(fc_stats->wq, &fc_stats->work,
fc_stats->sampling_interval);
- llist_for_each_entry(counter, tmplist, addlist)
+ llist_for_each_entry(counter, addlist, addlist)
mlx5_fc_stats_insert(dev, counter);
- tmplist = llist_del_all(&fc_stats->dellist);
- llist_for_each_entry_safe(counter, tmp, tmplist, dellist) {
- list_del(&counter->list);
+ llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
+ mlx5_fc_stats_remove(dev, counter);
mlx5_free_fc(dev, counter);
}
@@ -258,6 +273,12 @@ err_out:
}
EXPORT_SYMBOL(mlx5_fc_create);
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+ return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -266,10 +287,6 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
return;
if (counter->aging) {
- spin_lock(&fc_stats->counters_idr_lock);
- WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
- spin_unlock(&fc_stats->counters_idr_lock);
-
llist_add(&counter->dellist, &fc_stats->dellist);
mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 5ef3ef0072b4..9165ca567047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -110,12 +110,11 @@ struct mlx5i_tx_wqe {
static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
struct mlx5i_tx_wqe **wqe,
- u16 *pi)
+ u16 pi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memset(*wqe, 0, sizeof(**wqe));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index cc298527baf1..0594d0961cb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -39,6 +39,7 @@
#include <linux/if_link.h>
#include <linux/firmware.h>
#include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "5.0-0"
@@ -171,17 +172,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
- int header_type,
- size_t size,
- void *encap_header,
- u32 *encap_id);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
-
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
- u8 namespace, u8 num_actions,
- void *modify_actions, u32 *modify_header_id);
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 4ca07bfb6b14..91b8139a388d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
}
qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ qp->uid = MLX5_GET(create_dct_in, in, uid);
err = create_resource_common(dev, qp, MLX5_RES_DCT);
if (err)
goto err_cmd;
@@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
err_cmd:
MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, din, uid, qp->uid);
mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
(void *)&out, sizeof(dout));
return err;
@@ -240,6 +242,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
if (err)
return err;
+ qp->uid = MLX5_GET(create_qp_in, in, uid);
qp->qpn = MLX5_GET(create_qp_out, out, qpn);
mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
@@ -261,6 +264,7 @@ err_cmd:
memset(dout, 0, sizeof(dout));
MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, din, uid, qp->uid);
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
return err;
}
@@ -275,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(drain_dct_in, in, uid, qp->uid);
return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
(void *)&out, sizeof(out));
}
@@ -301,6 +306,7 @@ destroy:
destroy_resource_common(dev, &dct->mqp);
MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, in, uid, qp->uid);
err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
(void *)&out, sizeof(out));
return err;
@@ -320,6 +326,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, in, uid, qp->uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
@@ -373,7 +380,7 @@ static void mbox_free(struct mbox_info *mbox)
static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
u32 opt_param_mask, void *qpc,
- struct mbox_info *mbox)
+ struct mbox_info *mbox, u16 uid)
{
mbox->out = NULL;
mbox->in = NULL;
@@ -381,26 +388,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
#define MBOX_ALLOC(mbox, typ) \
mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
- MLX5_SET(typ##_in, in, opcode, _opcode); \
- MLX5_SET(typ##_in, in, qpn, _qpn)
-
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
- MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
- MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
- memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \
+ do { \
+ MLX5_SET(typ##_in, in, opcode, _opcode); \
+ MLX5_SET(typ##_in, in, qpn, _qpn); \
+ MLX5_SET(typ##_in, in, uid, _uid); \
+ } while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \
+ do { \
+ MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \
+ MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+ memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \
+ MLX5_ST_SZ_BYTES(qpc)); \
+ } while (0)
switch (opcode) {
/* 2RST & 2ERR */
case MLX5_CMD_OP_2RST_QP:
if (MBOX_ALLOC(mbox, qp_2rst))
return -ENOMEM;
- MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+ MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
break;
case MLX5_CMD_OP_2ERR_QP:
if (MBOX_ALLOC(mbox, qp_2err))
return -ENOMEM;
- MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+ MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
break;
/* MODIFY with QPC */
@@ -408,37 +421,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
if (MBOX_ALLOC(mbox, rst2init_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_INIT2RTR_QP:
if (MBOX_ALLOC(mbox, init2rtr_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_RTR2RTS_QP:
if (MBOX_ALLOC(mbox, rtr2rts_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_RTS2RTS_QP:
if (MBOX_ALLOC(mbox, rts2rts_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_SQERR2RTS_QP:
if (MBOX_ALLOC(mbox, sqerr2rts_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_INIT2INIT_QP:
if (MBOX_ALLOC(mbox, init2init_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
default:
mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
@@ -456,7 +469,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
int err;
err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
- opt_param_mask, qpc, &mbox);
+ opt_param_mask, qpc, &mbox, qp->uid);
if (err)
return err;
@@ -531,6 +544,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
}
EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ MLX5_SET(destroy_rq_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq)
{
@@ -541,6 +565,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
if (err)
return err;
+ rq->uid = MLX5_GET(create_rq_in, in, uid);
rq->qpn = rqn;
err = create_resource_common(dev, rq, MLX5_RES_RQ);
if (err)
@@ -549,7 +574,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
return 0;
err_destroy_rq:
- mlx5_core_destroy_rq(dev, rq->qpn);
+ destroy_rq_tracked(dev, rq->qpn, rq->uid);
return err;
}
@@ -559,10 +584,21 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
struct mlx5_core_qp *rq)
{
destroy_resource_common(dev, rq);
- mlx5_core_destroy_rq(dev, rq->qpn);
+ destroy_rq_tracked(dev, rq->qpn, rq->uid);
}
EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ MLX5_SET(destroy_sq_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *sq)
{
@@ -573,6 +609,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
if (err)
return err;
+ sq->uid = MLX5_GET(create_sq_in, in, uid);
sq->qpn = sqn;
err = create_resource_common(dev, sq, MLX5_RES_SQ);
if (err)
@@ -581,7 +618,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
return 0;
err_destroy_sq:
- mlx5_core_destroy_sq(dev, sq->qpn);
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
return err;
}
@@ -591,7 +628,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
struct mlx5_core_qp *sq)
{
destroy_resource_common(dev, sq);
- mlx5_core_destroy_sq(dev, sq->qpn);
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
}
EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 7e20666ce5ae..6a6fc9be01e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
if (!create_in)
return -ENOMEM;
+ MLX5_SET(create_srq_in, create_in, uid, in->uid);
srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
@@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
- if (!err)
+ if (!err) {
srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+ srq->uid = in->uid;
+ }
return err;
}
@@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_srq_in, srq_in, opcode,
MLX5_CMD_OP_DESTROY_SRQ);
MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+ MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
@@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
+ MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
@@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
if (!create_in)
return -ENOMEM;
+ MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
xrc_srq_context_entry);
pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
@@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
goto out;
srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+ srq->uid = in->uid;
out:
kvfree(create_in);
return err;
@@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
MLX5_CMD_OP_DESTROY_XRC_SRQ);
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
@@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
@@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(create_rmp_in, create_in, uid, in->uid);
set_wq(wq, in);
memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
+ if (!err)
+ srq->uid = in->uid;
kvfree(create_in);
return err;
@@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
{
- return mlx5_core_destroy_rmp(dev, srq->srqn);
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+
+ MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int arm_rmp_cmd(struct mlx5_core_dev *dev,
@@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(modify_rmp_in, in, uid, srq->uid);
MLX5_SET(wq, wq, lwm, lwm);
MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
@@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(xrqc, xrqc, user_index, in->user_index);
MLX5_SET(xrqc, xrqc, cqn, in->cqn);
MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+ MLX5_SET(create_xrq_in, create_in, uid, in->uid);
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
- if (!err)
+ if (!err) {
srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+ srq->uid = in->uid;
+ }
return err;
}
@@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+ MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 68e7f8df2a6d..2dcbf1ebfd6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
return (u32)wq->fbc.sz_m1 + 1;
}
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
-{
- return wq->fbc.frag_sz_m1 + 1;
-}
-
u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
{
return wq->fbc.sz_m1 + 1;
@@ -54,54 +49,37 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
return (u32)wq->fbc.sz_m1 + 1;
}
-static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
-{
- return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
-{
- return mlx5_wq_cyc_get_byte_size(&wq->rq) +
- mlx5_wq_cyc_get_byte_size(&wq->sq);
-}
-
-static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
+static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
{
- return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
-{
- return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+ return ((u32)1 << log_sz) << log_stride;
}
int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
+ u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+ u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
int err;
- mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
- MLX5_GET(wq, wqc, log_wq_sz),
- fbc);
- wq->sz = wq->fbc.sz_m1 + 1;
-
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
&wq_ctrl->buf, param->buf_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
goto err_db_free;
}
- fbc->frag_buf = wq_ctrl->buf;
- wq->db = wq_ctrl->db.db;
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+ wq->sz = mlx5_wq_cyc_get_size(wq);
wq_ctrl->mdev = mdev;
@@ -113,46 +91,19 @@ err_db_free:
return err;
}
-static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
- struct mlx5_wq_qp *qp)
-{
- struct mlx5_frag_buf_ctrl *sq_fbc;
- struct mlx5_frag_buf *rqb, *sqb;
-
- rqb = &qp->rq.fbc.frag_buf;
- *rqb = *buf;
- rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq);
- rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
-
- sq_fbc = &qp->sq.fbc;
- sqb = &sq_fbc->frag_buf;
- *sqb = *buf;
- sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq);
- sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
- sqb->frags += rqb->npages; /* first part is for the rq */
- if (sq_fbc->strides_offset)
- sqb->frags--;
-}
-
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
- u16 sq_strides_offset;
- u32 rq_pg_remainder;
- int err;
+ u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+ u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size);
+ u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB);
+ u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size);
- mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
- MLX5_GET(qpc, qpc, log_rq_size),
- &wq->rq.fbc);
+ u32 rq_byte_size;
+ int err;
- rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
- sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
- mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
- MLX5_GET(qpc, qpc, log_sq_size),
- sq_strides_offset,
- &wq->sq.fbc);
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
@@ -160,14 +111,32 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+ err = mlx5_frag_buf_alloc_node(mdev,
+ wq_get_byte_sz(log_rq_sz, log_rq_stride) +
+ wq_get_byte_sz(log_sq_sz, log_sq_stride),
&wq_ctrl->buf, param->buf_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
goto err_db_free;
}
- mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc);
+
+ rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride);
+
+ if (rq_byte_size < PAGE_SIZE) {
+ /* SQ starts within the same page of the RQ */
+ u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB;
+
+ mlx5_init_fbc_offset(wq_ctrl->buf.frags,
+ log_sq_stride, log_sq_sz, sq_strides_offset,
+ &wq->sq.fbc);
+ } else {
+ u16 rq_npages = rq_byte_size >> PAGE_SHIFT;
+
+ mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages,
+ log_sq_stride, log_sq_sz, &wq->sq.fbc);
+ }
wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -186,17 +155,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
+ u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+ u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
int err;
- mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
-
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
&wq_ctrl->buf,
param->buf_numa_node);
if (err) {
@@ -205,8 +176,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
goto err_db_free;
}
- wq->fbc.frag_buf = wq_ctrl->buf;
- wq->db = wq_ctrl->db.db;
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc);
wq_ctrl->mdev = mdev;
@@ -222,30 +192,29 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_ll *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
+ u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+ u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
struct mlx5_wqe_srq_next_seg *next_seg;
int err;
int i;
- mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
- MLX5_GET(wq, wqc, log_wq_sz),
- fbc);
-
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
&wq_ctrl->buf, param->buf_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
goto err_db_free;
}
- wq->fbc.frag_buf = wq_ctrl->buf;
- wq->db = wq_ctrl->db.db;
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
for (i = 0; i < fbc->sz_m1; i++) {
next_seg = mlx5_wq_ll_get_wqe(wq, i);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 3a1a170bb2d7..b1293d153a58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl);
u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
@@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
return ctr & wq->fbc.sz_m1;
}
-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
-{
- return ctr & wq->fbc.frag_sz_m1;
-}
-
static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
{
return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
}
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
{
int equal = (cc1 == cc2);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 68fa44a41485..1f77e97e2d7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -27,7 +27,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_acl_flex_keys.o \
spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
spectrum_mr_tcam.o spectrum_mr.o \
- spectrum_qdisc.o spectrum_span.o
+ spectrum_qdisc.o spectrum_span.o \
+ spectrum_nve.o spectrum_nve_vxlan.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 81533d7f395c..937d0ace699a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1055,6 +1055,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
err_driver_init:
mlxsw_thermal_fini(mlxsw_core->thermal);
err_thermal_init:
+ mlxsw_hwmon_fini(mlxsw_core->hwmon);
err_hwmon_init:
if (!reload)
devlink_unregister(devlink);
@@ -1088,6 +1089,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (mlxsw_core->driver->fini)
mlxsw_core->driver->fini(mlxsw_core);
mlxsw_thermal_fini(mlxsw_core->thermal);
+ mlxsw_hwmon_fini(mlxsw_core->hwmon);
if (!reload)
devlink_unregister(devlink);
mlxsw_emad_fini(mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 655ddd204ab2..c35be477856f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -359,6 +359,10 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
return 0;
}
+static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+}
+
#endif
struct mlxsw_thermal;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index f6cf2896d337..e04e8162aa14 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -303,8 +303,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
struct device *hwmon_dev;
int err;
- mlxsw_hwmon = devm_kzalloc(mlxsw_bus_info->dev, sizeof(*mlxsw_hwmon),
- GFP_KERNEL);
+ mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL);
if (!mlxsw_hwmon)
return -ENOMEM;
mlxsw_hwmon->core = mlxsw_core;
@@ -321,10 +320,9 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
- hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev,
- "mlxsw",
- mlxsw_hwmon,
- mlxsw_hwmon->groups);
+ hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev,
+ "mlxsw", mlxsw_hwmon,
+ mlxsw_hwmon->groups);
if (IS_ERR(hwmon_dev)) {
err = PTR_ERR(hwmon_dev);
goto err_hwmon_register;
@@ -337,5 +335,12 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
err_hwmon_register:
err_fans_init:
err_temp_init:
+ kfree(mlxsw_hwmon);
return err;
}
+
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+ hwmon_device_unregister(mlxsw_hwmon->hwmon_dev);
+ kfree(mlxsw_hwmon);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ed7e4c4e7403..8a4983adae94 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2994,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
goto err_port_qdiscs_init;
}
+ err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_nve_init;
+ }
+
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
if (IS_ERR(mlxsw_sp_port_vlan)) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
@@ -3022,6 +3029,8 @@ err_register_netdev:
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
err_port_vlan_get:
+ mlxsw_sp_port_nve_fini(mlxsw_sp_port);
+err_port_nve_init:
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
err_port_qdiscs_init:
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
@@ -3061,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
mlxsw_sp->ports[local_port] = NULL;
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+ mlxsw_sp_port_nve_fini(mlxsw_sp_port);
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -3795,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_afa_init;
}
+ err = mlxsw_sp_nve_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n");
+ goto err_nve_init;
+ }
+
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
@@ -3841,6 +3857,8 @@ err_acl_init:
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
+ mlxsw_sp_nve_fini(mlxsw_sp);
+err_nve_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3873,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+ mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -3887,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+ mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -3900,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_acl_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_nve_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4566,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
}
+static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev)
+{
+ unsigned int num_vxlans = 0;
+ struct net_device *dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(br_dev, dev, iter) {
+ if (netif_is_vxlan(dev))
+ num_vxlans++;
+ }
+
+ return num_vxlans > 1;
+}
+
+static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
+ struct netlink_ext_ack *extack)
+{
+ if (br_multicast_enabled(br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device");
+ return false;
+ }
+
+ if (br_vlan_enabled(br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device");
+ return false;
+ }
+
+ if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge");
+ return false;
+ }
+
+ return true;
+}
+
static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
struct net_device *dev,
unsigned long event, void *ptr)
@@ -4595,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
}
if (!info->linking)
break;
+ if (netif_is_bridge_master(upper_dev) &&
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+ mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+ !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+ return -EOPNOTSUPP;
if (netdev_has_any_upper_dev(upper_dev) &&
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4752,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
}
if (!info->linking)
break;
+ if (netif_is_bridge_master(upper_dev) &&
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+ mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+ !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+ return -EOPNOTSUPP;
if (netdev_has_any_upper_dev(upper_dev) &&
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4898,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
return netif_is_l3_master(info->upper_dev);
}
+static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *cu_info;
+ struct netdev_notifier_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper_dev;
+
+ extack = netdev_notifier_info_to_extack(info);
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ cu_info = container_of(info,
+ struct netdev_notifier_changeupper_info,
+ info);
+ upper_dev = cu_info->upper_dev;
+ if (!netif_is_bridge_master(upper_dev))
+ return 0;
+ if (!mlxsw_sp_lower_get(upper_dev))
+ return 0;
+ if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+ return -EOPNOTSUPP;
+ if (cu_info->linking) {
+ if (!netif_running(dev))
+ return 0;
+ return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
+ dev, extack);
+ } else {
+ mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+ }
+ break;
+ case NETDEV_PRE_UP:
+ upper_dev = netdev_master_upper_dev_get(dev);
+ if (!upper_dev)
+ return 0;
+ if (!netif_is_bridge_master(upper_dev))
+ return 0;
+ if (!mlxsw_sp_lower_get(upper_dev))
+ return 0;
+ return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev,
+ extack);
+ case NETDEV_DOWN:
+ upper_dev = netdev_master_upper_dev_get(dev);
+ if (!upper_dev)
+ return 0;
+ if (!netif_is_bridge_master(upper_dev))
+ return 0;
+ if (!mlxsw_sp_lower_get(upper_dev))
+ return 0;
+ mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+ break;
+ }
+
+ return 0;
+}
+
static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -4914,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
}
mlxsw_sp_span_respin(mlxsw_sp);
+ if (netif_is_vxlan(dev))
+ err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 1f68ac2a20f4..0875a79cbe7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -16,6 +16,7 @@
#include <net/psample.h>
#include <net/pkt_cls.h>
#include <net/red.h>
+#include <net/vxlan.h>
#include "port.h"
#include "core.h"
@@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id {
struct mlxsw_sp_port;
struct mlxsw_sp_rif;
struct mlxsw_sp_span_entry;
+enum mlxsw_sp_l3proto;
+union mlxsw_sp_l3addr;
struct mlxsw_sp_upper {
struct net_device *dev;
@@ -113,9 +116,11 @@ struct mlxsw_sp_acl;
struct mlxsw_sp_counter_pool;
struct mlxsw_sp_fid_core;
struct mlxsw_sp_kvdl;
+struct mlxsw_sp_nve;
struct mlxsw_sp_kvdl_ops;
struct mlxsw_sp_mr_tcam_ops;
struct mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_nve_ops;
struct mlxsw_sp {
struct mlxsw_sp_port **ports;
@@ -132,6 +137,7 @@ struct mlxsw_sp {
struct mlxsw_sp_acl *acl;
struct mlxsw_sp_fid_core *fid_core;
struct mlxsw_sp_kvdl *kvdl;
+ struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
struct mlxsw_sp_counter_pool *counter_pool;
@@ -146,6 +152,7 @@ struct mlxsw_sp {
const struct mlxsw_afk_ops *afk_ops;
const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+ const struct mlxsw_sp_nve_ops **nve_ops_arr;
};
static inline struct mlxsw_sp_upper *
@@ -235,6 +242,25 @@ struct mlxsw_sp_port {
struct mlxsw_sp_acl_block *eg_acl_block;
};
+static inline struct net_device *
+mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(br_dev, dev, iter) {
+ if (netif_is_vxlan(dev))
+ return dev;
+ }
+
+ return NULL;
+}
+
+static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev)
+{
+ return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev);
+}
+
static inline bool
mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
{
@@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *br_dev);
bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev);
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev);
/* spectrum.c */
int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -431,6 +464,15 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev);
u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip,
+ u32 tunnel_index);
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip);
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ u16 *vr_id);
/* spectrum_kvdl.c */
enum mlxsw_sp_kvdl_entry_type {
@@ -679,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
/* spectrum_fid.c */
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+ __be32 vni);
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index);
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni);
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
enum mlxsw_sp_flood_type packet_type, u8 local_port,
bool member);
@@ -697,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
int br_ifindex);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+ int br_ifindex);
struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
@@ -742,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
/* spectrum2_mr_tcam.c */
extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
+/* spectrum_nve.c */
+enum mlxsw_sp_nve_type {
+ MLXSW_SP_NVE_TYPE_VXLAN,
+};
+
+struct mlxsw_sp_nve_params {
+ enum mlxsw_sp_nve_type type;
+ __be32 vni;
+ const struct net_device *dev;
+};
+
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr);
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr);
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id, __be32 addr);
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_params *params,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid);
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 715d24ff937e..a3db033d7399 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -6,6 +6,7 @@
#include <linux/if_vlan.h>
#include <linux/if_bridge.h>
#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
#include <linux/rtnetlink.h>
#include "spectrum.h"
@@ -14,6 +15,7 @@
struct mlxsw_sp_fid_family;
struct mlxsw_sp_fid_core {
+ struct rhashtable vni_ht;
struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
unsigned int *port_fid_mappings;
};
@@ -24,6 +26,12 @@ struct mlxsw_sp_fid {
unsigned int ref_count;
u16 fid_index;
struct mlxsw_sp_fid_family *fid_family;
+
+ struct rhash_head vni_ht_node;
+ __be32 vni;
+ u32 nve_flood_index;
+ u8 vni_valid:1,
+ nve_flood_index_valid:1;
};
struct mlxsw_sp_fid_8021q {
@@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d {
int br_ifindex;
};
+static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = {
+ .key_len = sizeof_field(struct mlxsw_sp_fid, vni),
+ .key_offset = offsetof(struct mlxsw_sp_fid, vni),
+ .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node),
+};
+
struct mlxsw_sp_flood_table {
enum mlxsw_sp_flood_type packet_type;
enum mlxsw_reg_sfgc_bridge_type bridge_type;
@@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops {
struct mlxsw_sp_port *port, u16 vid);
void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
struct mlxsw_sp_port *port, u16 vid);
+ int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni);
+ void (*vni_clear)(struct mlxsw_sp_fid *fid);
+ int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index);
+ void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid);
};
struct mlxsw_sp_fid_family {
@@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = {
[MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types,
};
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+ __be32 vni)
+{
+ struct mlxsw_sp_fid *fid;
+
+ fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni,
+ mlxsw_sp_fid_vni_ht_params);
+ if (fid)
+ fid->ref_count++;
+
+ return fid;
+}
+
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni)
+{
+ if (!fid->vni_valid)
+ return -EINVAL;
+
+ *vni = fid->vni;
+
+ return 0;
+}
+
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ int err;
+
+ if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid))
+ return -EINVAL;
+
+ err = ops->nve_flood_index_set(fid, nve_flood_index);
+ if (err)
+ return err;
+
+ fid->nve_flood_index = nve_flood_index;
+ fid->nve_flood_index_valid = true;
+
+ return 0;
+}
+
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+
+ if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid))
+ return;
+
+ fid->nve_flood_index_valid = false;
+ ops->nve_flood_index_clear(fid);
+}
+
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid)
+{
+ return fid->nve_flood_index_valid;
+}
+
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+ int err;
+
+ if (WARN_ON(!ops->vni_set || fid->vni_valid))
+ return -EINVAL;
+
+ fid->vni = vni;
+ err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht,
+ &fid->vni_ht_node,
+ mlxsw_sp_fid_vni_ht_params);
+ if (err)
+ return err;
+
+ err = ops->vni_set(fid, vni);
+ if (err)
+ goto err_vni_set;
+
+ fid->vni_valid = true;
+
+ return 0;
+
+err_vni_set:
+ rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+ mlxsw_sp_fid_vni_ht_params);
+ return err;
+}
+
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+
+ if (WARN_ON(!ops->vni_clear || !fid->vni_valid))
+ return;
+
+ fid->vni_valid = false;
+ ops->vni_clear(fid);
+ rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+ mlxsw_sp_fid_vni_ht_params);
+}
+
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid)
+{
+ return fid->vni_valid;
+}
+
static const struct mlxsw_sp_flood_table *
mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid,
enum mlxsw_sp_flood_type packet_type)
@@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
}
+static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+ __be32 vni, bool vni_valid, u32 nve_flood_index,
+ bool nve_flood_index_valid)
+{
+ char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+ mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index,
+ 0);
+ mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid);
+ mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni));
+ mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid);
+ mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
u16 vid, bool valid)
{
@@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid)
static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
{
+ if (fid->vni_valid)
+ mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid);
mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
}
@@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid,
mlxsw_sp_port->local_port, vid, false);
}
+static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni,
+ true, fid->nve_flood_index,
+ fid->nve_flood_index_valid);
+}
+
+static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false,
+ fid->nve_flood_index, fid->nve_flood_index_valid);
+}
+
+static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index,
+ fid->vni, fid->vni_valid, nve_flood_index,
+ true);
+}
+
+static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni,
+ fid->vni_valid, 0, false);
+}
+
static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
.setup = mlxsw_sp_fid_8021d_setup,
.configure = mlxsw_sp_fid_8021d_configure,
@@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
.flood_index = mlxsw_sp_fid_8021d_flood_index,
.port_vid_map = mlxsw_sp_fid_8021d_port_vid_map,
.port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap,
+ .vni_set = mlxsw_sp_fid_8021d_vni_set,
+ .vni_clear = mlxsw_sp_fid_8021d_vni_clear,
+ .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set,
+ .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear,
};
static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = {
@@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
[MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family,
};
-static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
- enum mlxsw_sp_fid_type type,
- const void *arg)
+static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type,
+ const void *arg)
{
struct mlxsw_sp_fid_family *fid_family;
struct mlxsw_sp_fid *fid;
- u16 fid_index;
- int err;
fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
list_for_each_entry(fid, &fid_family->fids_list, list) {
@@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
return fid;
}
+ return NULL;
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type,
+ const void *arg)
+{
+ struct mlxsw_sp_fid_family *fid_family;
+ struct mlxsw_sp_fid *fid;
+ u16 fid_index;
+ int err;
+
+ fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg);
+ if (fid)
+ return fid;
+
+ fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
if (!fid)
return ERR_PTR(-ENOMEM);
@@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex);
}
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+ int br_ifindex)
+{
+ return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D,
+ &br_ifindex);
+}
+
struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
u16 rif_index)
{
@@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
return -ENOMEM;
mlxsw_sp->fid_core = fid_core;
+ err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
GFP_KERNEL);
if (!fid_core->port_fid_mappings) {
@@ -944,6 +1156,8 @@ err_fid_ops_register:
}
kfree(fid_core->port_fid_mappings);
err_alloc_port_fid_mappings:
+ rhashtable_destroy(&fid_core->vni_ht);
+err_rhashtable_init:
kfree(fid_core);
return err;
}
@@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp_fid_family_unregister(mlxsw_sp,
fid_core->fid_family_arr[i]);
kfree(fid_core->port_fid_mappings);
+ rhashtable_destroy(&fid_core->vni_ht);
kfree(fid_core);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
new file mode 100644
index 000000000000..ad06d9969bc1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -0,0 +1,982 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_nve.h"
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = {
+ [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp1_nve_vxlan_ops,
+};
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = {
+ [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp2_nve_vxlan_ops,
+};
+
+struct mlxsw_sp_nve_mc_entry;
+struct mlxsw_sp_nve_mc_record;
+struct mlxsw_sp_nve_mc_list;
+
+struct mlxsw_sp_nve_mc_record_ops {
+ enum mlxsw_reg_tnumt_record_type type;
+ int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr);
+ void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry);
+ void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ char *tnumt_pl, unsigned int entry_index);
+ bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr);
+};
+
+struct mlxsw_sp_nve_mc_list_key {
+ u16 fid_index;
+};
+
+struct mlxsw_sp_nve_mc_ipv6_entry {
+ struct in6_addr addr6;
+ u32 addr6_kvdl_index;
+};
+
+struct mlxsw_sp_nve_mc_entry {
+ union {
+ __be32 addr4;
+ struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry;
+ };
+ u8 valid:1;
+};
+
+struct mlxsw_sp_nve_mc_record {
+ struct list_head list;
+ enum mlxsw_sp_l3proto proto;
+ unsigned int num_entries;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_nve_mc_list *mc_list;
+ const struct mlxsw_sp_nve_mc_record_ops *ops;
+ u32 kvdl_index;
+ struct mlxsw_sp_nve_mc_entry entries[0];
+};
+
+struct mlxsw_sp_nve_mc_list {
+ struct list_head records_list;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_nve_mc_list_key key;
+};
+
+static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key),
+ .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key),
+ .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node),
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ mc_entry->addr4 = addr->addr4;
+
+ return 0;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ char *tnumt_pl, unsigned int entry_index)
+{
+ u32 udip = be32_to_cpu(mc_entry->addr4);
+
+ mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ return mc_entry->addr4 == addr->addr4;
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv4_ops = {
+ .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+ .entry_add = &mlxsw_sp_nve_mc_record_ipv4_entry_add,
+ .entry_del = &mlxsw_sp_nve_mc_record_ipv4_entry_del,
+ .entry_set = &mlxsw_sp_nve_mc_record_ipv4_entry_set,
+ .entry_compare = &mlxsw_sp_nve_mc_record_ipv4_entry_compare,
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ WARN_ON(1);
+
+ return -EINVAL;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ char *tnumt_pl, unsigned int entry_index)
+{
+ u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index;
+
+ mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6);
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv6_ops = {
+ .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+ .entry_add = &mlxsw_sp_nve_mc_record_ipv6_entry_add,
+ .entry_del = &mlxsw_sp_nve_mc_record_ipv6_entry_del,
+ .entry_set = &mlxsw_sp_nve_mc_record_ipv6_entry_set,
+ .entry_compare = &mlxsw_sp_nve_mc_record_ipv6_entry_compare,
+};
+
+static const struct mlxsw_sp_nve_mc_record_ops *
+mlxsw_sp_nve_mc_record_ops_arr[] = {
+ [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops,
+ [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops,
+};
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_mc_list_key *key)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+ return rhashtable_lookup_fast(&nve->mc_list_ht, key,
+ mlxsw_sp_nve_mc_list_ht_params);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_mc_list_key *key)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ struct mlxsw_sp_nve_mc_list *mc_list;
+ int err;
+
+ mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL);
+ if (!mc_list)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&mc_list->records_list);
+ mc_list->key = *key;
+
+ err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node,
+ mlxsw_sp_nve_mc_list_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ return mc_list;
+
+err_rhashtable_insert:
+ kfree(mc_list);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+ rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node,
+ mlxsw_sp_nve_mc_list_ht_params);
+ WARN_ON(!list_empty(&mc_list->records_list));
+ kfree(mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_mc_list_key *key)
+{
+ struct mlxsw_sp_nve_mc_list *mc_list;
+
+ mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key);
+ if (mc_list)
+ return mc_list;
+
+ return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key);
+}
+
+static void
+mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ if (!list_empty(&mc_list->records_list))
+ return;
+ mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto)
+{
+ unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto];
+ struct mlxsw_sp_nve_mc_record *mc_record;
+ int err;
+
+ mc_record = kzalloc(sizeof(*mc_record) + num_max_entries *
+ sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL);
+ if (!mc_record)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+ &mc_record->kvdl_index);
+ if (err)
+ goto err_kvdl_alloc;
+
+ mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto];
+ mc_record->mlxsw_sp = mlxsw_sp;
+ mc_record->mc_list = mc_list;
+ mc_record->proto = proto;
+ list_add_tail(&mc_record->list, &mc_list->records_list);
+
+ return mc_record;
+
+err_kvdl_alloc:
+ kfree(mc_record);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+
+ list_del(&mc_record->list);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+ mc_record->kvdl_index);
+ WARN_ON(mc_record->num_entries);
+ kfree(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) {
+ unsigned int num_entries = mc_record->num_entries;
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+ if (mc_record->proto == proto &&
+ num_entries < nve->num_max_mc_entries[proto])
+ return mc_record;
+ }
+
+ return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto);
+}
+
+static void
+mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ if (mc_record->num_entries != 0)
+ return;
+
+ mlxsw_sp_nve_mc_record_destroy(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+ unsigned int num_max_entries;
+ int i;
+
+ num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ if (mc_record->entries[i].valid)
+ continue;
+ return &mc_record->entries[i];
+ }
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type;
+ struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+ struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+ char tnumt_pl[MLXSW_REG_TNUMT_LEN];
+ unsigned int num_max_entries;
+ unsigned int num_entries = 0;
+ u32 next_kvdl_index = 0;
+ bool next_valid = false;
+ int i;
+
+ if (!list_is_last(&mc_record->list, &mc_list->records_list)) {
+ struct mlxsw_sp_nve_mc_record *next_record;
+
+ next_record = list_next_entry(mc_record, list);
+ next_kvdl_index = next_record->kvdl_index;
+ next_valid = true;
+ }
+
+ mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+ mc_record->kvdl_index, next_valid,
+ next_kvdl_index, mc_record->num_entries);
+
+ num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+ mc_entry = &mc_record->entries[i];
+ if (!mc_entry->valid)
+ continue;
+ mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl,
+ num_entries++);
+ }
+
+ WARN_ON(num_entries != mc_record->num_entries);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+ struct mlxsw_sp_nve_mc_record *first_record;
+
+ first_record = list_first_entry(&mc_list->records_list,
+ struct mlxsw_sp_nve_mc_record, list);
+
+ return mc_record == first_record;
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+ unsigned int num_max_entries;
+ int i;
+
+ num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+ mc_entry = &mc_record->entries[i];
+ if (!mc_entry->valid)
+ continue;
+ if (mc_record->ops->entry_compare(mc_record, mc_entry, addr))
+ return mc_entry;
+ }
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_entry *mc_entry = NULL;
+ int err;
+
+ mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record);
+ if (WARN_ON(!mc_entry))
+ return -EINVAL;
+
+ err = mc_record->ops->entry_add(mc_record, mc_entry, addr);
+ if (err)
+ return err;
+ mc_record->num_entries++;
+ mc_entry->valid = true;
+
+ err = mlxsw_sp_nve_mc_record_refresh(mc_record);
+ if (err)
+ goto err_record_refresh;
+
+ /* If this is a new record and not the first one, then we need to
+ * update the next pointer of the previous entry
+ */
+ if (mc_record->num_entries != 1 ||
+ mlxsw_sp_nve_mc_record_is_first(mc_record))
+ return 0;
+
+ err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list));
+ if (err)
+ goto err_prev_record_refresh;
+
+ return 0;
+
+err_prev_record_refresh:
+err_record_refresh:
+ mc_entry->valid = false;
+ mc_record->num_entries--;
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return err;
+}
+
+static void
+mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+ struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+
+ mc_entry->valid = false;
+ mc_record->num_entries--;
+
+ /* When the record continues to exist we only need to invalidate
+ * the requested entry
+ */
+ if (mc_record->num_entries != 0) {
+ mlxsw_sp_nve_mc_record_refresh(mc_record);
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return;
+ }
+
+ /* If the record needs to be deleted, but it is not the first,
+ * then we need to make sure that the previous record no longer
+ * points to it. Remove deleted record from the list to reflect
+ * that and then re-add it at the end, so that it could be
+ * properly removed by the record destruction code
+ */
+ if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) {
+ struct mlxsw_sp_nve_mc_record *prev_record;
+
+ prev_record = list_prev_entry(mc_record, list);
+ list_del(&mc_record->list);
+ mlxsw_sp_nve_mc_record_refresh(prev_record);
+ list_add_tail(&mc_record->list, &mc_list->records_list);
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return;
+ }
+
+ /* If the first record needs to be deleted, but the list is not
+ * singular, then the second record needs to be written in the
+ * first record's address, as this address is stored as a property
+ * of the FID
+ */
+ if (mlxsw_sp_nve_mc_record_is_first(mc_record) &&
+ !list_is_singular(&mc_list->records_list)) {
+ struct mlxsw_sp_nve_mc_record *next_record;
+
+ next_record = list_next_entry(mc_record, list);
+ swap(mc_record->kvdl_index, next_record->kvdl_index);
+ mlxsw_sp_nve_mc_record_refresh(next_record);
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return;
+ }
+
+ /* This is the last case where the last remaining record needs to
+ * be deleted. Simply delete the entry
+ */
+ mc_record->ops->entry_del(mc_record, mc_entry);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr,
+ struct mlxsw_sp_nve_mc_entry **mc_entry)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ list_for_each_entry(mc_record, &mc_list->records_list, list) {
+ if (mc_record->proto != proto)
+ continue;
+
+ *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr);
+ if (*mc_entry)
+ return mc_record;
+ }
+
+ return NULL;
+}
+
+static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+ int err;
+
+ mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto);
+ if (IS_ERR(mc_record))
+ return PTR_ERR(mc_record);
+
+ err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr);
+ if (err)
+ goto err_ip_add;
+
+ return 0;
+
+err_ip_add:
+ mlxsw_sp_nve_mc_record_put(mc_record);
+ return err;
+}
+
+static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+ struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+ mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr,
+ &mc_entry);
+ if (WARN_ON(!mc_record))
+ return;
+
+ mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+ mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static int
+mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ /* The address of the first record in the list is a property of
+ * the FID and we never change it. It only needs to be set when
+ * a new list is created
+ */
+ if (mlxsw_sp_fid_nve_flood_index_is_set(fid))
+ return 0;
+
+ mc_record = list_first_entry(&mc_list->records_list,
+ struct mlxsw_sp_nve_mc_record, list);
+
+ return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index);
+}
+
+static void
+mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ /* The address of the first record needs to be invalidated only when
+ * the last record is about to be removed
+ */
+ if (!list_is_singular(&mc_list->records_list))
+ return;
+
+ mc_record = list_first_entry(&mc_list->records_list,
+ struct mlxsw_sp_nve_mc_record, list);
+ if (mc_record->num_entries != 1)
+ return;
+
+ return mlxsw_sp_fid_nve_flood_index_clear(fid);
+}
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_list_key key = { 0 };
+ struct mlxsw_sp_nve_mc_list *mc_list;
+ int err;
+
+ key.fid_index = mlxsw_sp_fid_index(fid);
+ mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key);
+ if (IS_ERR(mc_list))
+ return PTR_ERR(mc_list);
+
+ err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr);
+ if (err)
+ goto err_add_ip;
+
+ err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list);
+ if (err)
+ goto err_fid_flood_index_set;
+
+ return 0;
+
+err_fid_flood_index_set:
+ mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+err_add_ip:
+ mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+ return err;
+}
+
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_list_key key = { 0 };
+ struct mlxsw_sp_nve_mc_list *mc_list;
+
+ key.fid_index = mlxsw_sp_fid_index(fid);
+ mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+ if (WARN_ON(!mc_list))
+ return;
+
+ mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list);
+ mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+ mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+static void
+mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+ unsigned int num_max_entries;
+ int i;
+
+ num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i];
+
+ if (!mc_entry->valid)
+ continue;
+ mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+ }
+
+ WARN_ON(mc_record->num_entries);
+ mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record, *tmp;
+ struct mlxsw_sp_nve_mc_list_key key = { 0 };
+ struct mlxsw_sp_nve_mc_list *mc_list;
+
+ if (!mlxsw_sp_fid_nve_flood_index_is_set(fid))
+ return;
+
+ mlxsw_sp_fid_nve_flood_index_clear(fid);
+
+ key.fid_index = mlxsw_sp_fid_index(fid);
+ mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+ if (WARN_ON(!mc_list))
+ return;
+
+ list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list)
+ mlxsw_sp_nve_mc_record_delete(mc_record);
+
+ WARN_ON(!list_empty(&mc_list->records_list));
+ mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
+{
+ WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
+
+ return mlxsw_sp->nve->tunnel_index;
+}
+
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id, __be32 addr)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ struct mlxsw_sp_nve_config *config = &nve->config;
+
+ if (nve->num_nve_tunnels &&
+ config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
+ config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
+ return true;
+
+ return false;
+}
+
+static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_config *config)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ const struct mlxsw_sp_nve_ops *ops;
+ int err;
+
+ if (nve->num_nve_tunnels++ != 0)
+ return 0;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+ &nve->tunnel_index);
+ if (err)
+ goto err_kvdl_alloc;
+
+ ops = nve->nve_ops_arr[config->type];
+ err = ops->init(nve, config);
+ if (err)
+ goto err_ops_init;
+
+ return 0;
+
+err_ops_init:
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+ nve->tunnel_index);
+err_kvdl_alloc:
+ nve->num_nve_tunnels--;
+ return err;
+}
+
+static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ const struct mlxsw_sp_nve_ops *ops;
+
+ ops = nve->nve_ops_arr[nve->config.type];
+
+ if (mlxsw_sp->nve->num_nve_tunnels == 1) {
+ ops->fini(nve);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+ nve->tunnel_index);
+ }
+ nve->num_nve_tunnels--;
+}
+
+static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp,
+ u16 fid_index)
+{
+ char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+ mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID);
+ mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ const struct mlxsw_sp_nve_ops *ops;
+ struct mlxsw_sp_nve_config config;
+ int err;
+
+ ops = nve->nve_ops_arr[params->type];
+
+ if (!ops->can_offload(nve, params->dev, extack))
+ return -EOPNOTSUPP;
+
+ memset(&config, 0, sizeof(config));
+ ops->nve_config(nve, params->dev, &config);
+ if (nve->num_nve_tunnels &&
+ memcmp(&config, &nve->config, sizeof(config))) {
+ NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel");
+ return err;
+ }
+
+ err = mlxsw_sp_fid_vni_set(fid, params->vni);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID");
+ goto err_fid_vni_set;
+ }
+
+ nve->config = config;
+
+ return 0;
+
+err_fid_vni_set:
+ mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+ return err;
+}
+
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid)
+{
+ u16 fid_index = mlxsw_sp_fid_index(fid);
+
+ mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid);
+ mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index);
+ mlxsw_sp_fid_vni_clear(fid);
+ mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+}
+
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char tnqdr_pl[MLXSW_REG_TNQDR_LEN];
+
+ mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl);
+}
+
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+}
+
+static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char tnqcr_pl[MLXSW_REG_TNQCR_LEN];
+
+ mlxsw_reg_tnqcr_pack(tnqcr_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl);
+}
+
+static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ /* Iterate over inner ECN values */
+ for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+ u8 outer_ecn = INET_ECN_encapsulate(0, i);
+ char tneem_pl[MLXSW_REG_TNEEM_LEN];
+ int err;
+
+ mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem),
+ tneem_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp,
+ u8 inner_ecn, u8 outer_ecn)
+{
+ char tndem_pl[MLXSW_REG_TNDEM_LEN];
+ bool trap_en, set_ce = false;
+ u8 new_inner_ecn;
+
+ trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+ new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
+
+ mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn,
+ trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl);
+}
+
+static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ /* Iterate over inner ECN values */
+ for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+ int j;
+
+ /* Iterate over outer ECN values */
+ for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
+ int err;
+
+ err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp);
+ if (err)
+ return err;
+
+ return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp);
+}
+
+static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned int max;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6))
+ return -EIO;
+ max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4);
+ mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max;
+ max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6);
+ mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max;
+
+ return 0;
+}
+
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nve *nve;
+ int err;
+
+ nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL);
+ if (!nve)
+ return -ENOMEM;
+ mlxsw_sp->nve = nve;
+ nve->mlxsw_sp = mlxsw_sp;
+ nve->nve_ops_arr = mlxsw_sp->nve_ops_arr;
+
+ err = rhashtable_init(&nve->mc_list_ht,
+ &mlxsw_sp_nve_mc_list_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ err = mlxsw_sp_nve_qos_init(mlxsw_sp);
+ if (err)
+ goto err_nve_qos_init;
+
+ err = mlxsw_sp_nve_ecn_init(mlxsw_sp);
+ if (err)
+ goto err_nve_ecn_init;
+
+ err = mlxsw_sp_nve_resources_query(mlxsw_sp);
+ if (err)
+ goto err_nve_resources_query;
+
+ return 0;
+
+err_nve_resources_query:
+err_nve_ecn_init:
+err_nve_qos_init:
+ rhashtable_destroy(&nve->mc_list_ht);
+err_rhashtable_init:
+ mlxsw_sp->nve = NULL;
+ kfree(nve);
+ return err;
+}
+
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ WARN_ON(mlxsw_sp->nve->num_nve_tunnels);
+ rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht);
+ mlxsw_sp->nve = NULL;
+ kfree(mlxsw_sp->nve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
new file mode 100644
index 000000000000..4cc3297e13d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_NVE_H
+#define _MLXSW_SPECTRUM_NVE_H
+
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_nve_config {
+ enum mlxsw_sp_nve_type type;
+ u8 ttl;
+ u8 learning_en:1;
+ __be16 udp_dport;
+ __be32 flowlabel;
+ u32 ul_tb_id;
+ enum mlxsw_sp_l3proto ul_proto;
+ union mlxsw_sp_l3addr ul_sip;
+};
+
+struct mlxsw_sp_nve {
+ struct mlxsw_sp_nve_config config;
+ struct rhashtable mc_list_ht;
+ struct mlxsw_sp *mlxsw_sp;
+ const struct mlxsw_sp_nve_ops **nve_ops_arr;
+ unsigned int num_nve_tunnels; /* Protected by RTNL */
+ unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
+ u32 tunnel_index;
+};
+
+struct mlxsw_sp_nve_ops {
+ enum mlxsw_sp_nve_type type;
+ bool (*can_offload)(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct netlink_ext_ack *extack);
+ void (*nve_config)(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct mlxsw_sp_nve_config *config);
+ int (*init)(struct mlxsw_sp_nve *nve,
+ const struct mlxsw_sp_nve_config *config);
+ void (*fini)(struct mlxsw_sp_nve *nve);
+};
+
+extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops;
+extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
new file mode 100644
index 000000000000..d21c7be5b1c9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <net/vxlan.h>
+
+#include "reg.h"
+#include "spectrum_nve.h"
+
+/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
+ *
+ * In the worst case - where we have a VLAN tag on the outer Ethernet
+ * header and IPv6 in overlay and underlay - we need to parse 128 bytes
+ */
+#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
+#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
+
+#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX
+
+static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_config *cfg = &vxlan->cfg;
+
+ if (cfg->saddr.sa.sa_family != AF_INET) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
+ return false;
+ }
+
+ if (vxlan_addr_multicast(&cfg->remote_ip)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
+ return false;
+ }
+
+ if (vxlan_addr_any(&cfg->saddr)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
+ return false;
+ }
+
+ if (cfg->remote_ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
+ return false;
+ }
+
+ if (cfg->port_min || cfg->port_max) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
+ return false;
+ }
+
+ if (cfg->tos != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
+ return false;
+ }
+
+ if (cfg->flags & VXLAN_F_TTL_INHERIT) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
+ return false;
+ }
+
+ if (cfg->flags & VXLAN_F_LEARN) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported");
+ return false;
+ }
+
+ if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
+ return false;
+ }
+
+ if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
+ return false;
+ }
+
+ if (cfg->ttl == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
+ return false;
+ }
+
+ if (cfg->label != 0) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
+ return false;
+ }
+
+ return true;
+}
+
+static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct mlxsw_sp_nve_config *config)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_config *cfg = &vxlan->cfg;
+
+ config->type = MLXSW_SP_NVE_TYPE_VXLAN;
+ config->ttl = cfg->ttl;
+ config->flowlabel = cfg->label;
+ config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
+ config->ul_tb_id = RT_TABLE_MAIN;
+ config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
+ config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+ config->udp_dport = cfg->dst_port;
+}
+
+static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+ unsigned int parsing_depth,
+ __be16 udp_dport)
+{
+ char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+ mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+}
+
+static int
+mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_config *config)
+{
+ char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+ u16 ul_vr_id;
+ u8 udp_sport;
+ int err;
+
+ err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
+ &ul_vr_id);
+ if (err)
+ return err;
+
+ mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
+ config->ttl);
+ /* VxLAN driver's default UDP source port range is 32768 (0x8000)
+ * to 60999 (0xee47). Set the upper 8 bits of the UDP source port
+ * to a random number between 0x80 and 0xee
+ */
+ get_random_bytes(&udp_sport, sizeof(udp_sport));
+ udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
+ mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
+ mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
+ mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
+ mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
+{
+ char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+
+ mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
+
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
+ unsigned int tunnel_index)
+{
+ char rtdp_pl[MLXSW_REG_RTDP_LEN];
+
+ mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+ const struct mlxsw_sp_nve_config *config)
+{
+ struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+ int err;
+
+ err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
+ MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
+ config->udp_dport);
+ if (err)
+ return err;
+
+ err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
+ if (err)
+ goto err_config_set;
+
+ err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
+ if (err)
+ goto err_rtdp_set;
+
+ err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
+ config->ul_proto,
+ &config->ul_sip,
+ nve->tunnel_index);
+ if (err)
+ goto err_promote_decap;
+
+ return 0;
+
+err_promote_decap:
+err_rtdp_set:
+ mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+err_config_set:
+ mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+ config->udp_dport);
+ return err;
+}
+
+static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+ struct mlxsw_sp_nve_config *config = &nve->config;
+ struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+
+ mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
+ config->ul_proto, &config->ul_sip);
+ mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+ mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+ config->udp_dport);
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
+ .type = MLXSW_SP_NVE_TYPE_VXLAN,
+ .can_offload = mlxsw_sp1_nve_vxlan_can_offload,
+ .nve_config = mlxsw_sp_nve_vxlan_config,
+ .init = mlxsw_sp1_nve_vxlan_init,
+ .fini = mlxsw_sp1_nve_vxlan_fini,
+};
+
+static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ return false;
+}
+
+static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+ const struct mlxsw_sp_nve_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
+ .type = MLXSW_SP_NVE_TYPE_VXLAN,
+ .can_offload = mlxsw_sp2_nve_vxlan_can_offload,
+ .nve_config = mlxsw_sp_nve_vxlan_config,
+ .init = mlxsw_sp2_nve_vxlan_init,
+ .fini = mlxsw_sp2_nve_vxlan_fini,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2ab9cf25a08a..9e9bb57134f2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type {
* encapsulating entries.)
*/
MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
+ MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
};
struct mlxsw_sp_nexthop_group;
@@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ u16 *vr_id)
+{
+ struct mlxsw_sp_vr *vr;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+ if (!vr)
+ return -ESRCH;
+ *vr_id = vr->id;
+
+ return 0;
+}
+
static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
enum mlxsw_sp_l3proto proto)
{
@@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
}
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ enum mlxsw_sp_l3proto proto,
+ const union mlxsw_sp_l3addr *addr,
+ enum mlxsw_sp_fib_entry_type type)
+{
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ unsigned char addr_prefix_len;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+ const void *addrp;
+ size_t addr_len;
+ u32 addr4;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+ if (!vr)
+ return NULL;
+ fib = mlxsw_sp_vr_fib(vr, proto);
+
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ addr4 = be32_to_cpu(addr->addr4);
+ addrp = &addr4;
+ addr_len = 4;
+ addr_prefix_len = 32;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
+ addr_prefix_len);
+ if (!fib_node || list_empty(&fib_node->entry_list))
+ return NULL;
+
+ fib_entry = list_first_entry(&fib_node->entry_list,
+ struct mlxsw_sp_fib_entry, list);
+ if (fib_entry->type != type)
+ return NULL;
+
+ return fib_entry;
+}
+
/* Given an IPIP entry, find the corresponding decap route. */
static struct mlxsw_sp_fib_entry *
mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
@@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip,
+ u32 tunnel_index)
+{
+ enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ int err;
+
+ /* It is valid to create a tunnel with a local IP and only later
+ * assign this IP address to a local interface
+ */
+ fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+ ul_proto, ul_sip,
+ type);
+ if (!fib_entry)
+ return 0;
+
+ fib_entry->decap.tunnel_index = tunnel_index;
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+ if (err)
+ goto err_fib_entry_update;
+
+ return 0;
+
+err_fib_entry_update:
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+ return err;
+}
+
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip)
+{
+ enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+ struct mlxsw_sp_fib_entry *fib_entry;
+
+ fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+ ul_proto, ul_sip,
+ type);
+ if (!fib_entry)
+ return;
+
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
struct mlxsw_sp_neigh_key {
struct neighbour *n;
};
@@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
return !!nh_group->nh_rif;
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+ case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
return true;
default:
return false;
@@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
int i;
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
- fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
+ fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
+ fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
return;
}
@@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
fib_entry->decap.tunnel_index);
}
+static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+ fib_entry->decap.tunnel_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
@@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
fib_entry, op);
+ case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
+ return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
}
return -EINVAL;
}
@@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
+ u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
struct net_device *dev = fen_info->fi->fib_dev;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
@@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
fib_entry,
ipip_entry);
}
+ if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
+ dip.addr4)) {
+ u32 t_index;
+
+ t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
+ fib_entry->decap.tunnel_index = t_index;
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+ return 0;
+ }
/* fall through */
case RTN_BROADCAST:
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index fa16ad2c6a50..bc60d7a8b49d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -15,6 +15,7 @@
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
#include <net/switchdev.h>
+#include <net/vxlan.h>
#include "spectrum_span.h"
#include "spectrum_switchdev.h"
@@ -83,9 +84,19 @@ struct mlxsw_sp_bridge_ops {
void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
struct mlxsw_sp_port *mlxsw_sp_port);
+ int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack);
+ void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev);
struct mlxsw_sp_fid *
(*fid_get)(struct mlxsw_sp_bridge_device *bridge_device,
u16 vid);
+ struct mlxsw_sp_fid *
+ (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid);
+ u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct mlxsw_sp_fid *fid);
};
static int
@@ -1236,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
MLXSW_REG_SFD_OP_WRITE_REMOVE;
}
+static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
+ const char *mac, u16 fid,
+ enum mlxsw_sp_l3proto proto,
+ const union mlxsw_sp_l3addr *addr,
+ bool adding, bool dynamic)
+{
+ enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto;
+ char *sfd_pl;
+ u8 num_rec;
+ u32 uip;
+ int err;
+
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ uip = be32_to_cpu(addr->addr4);
+ sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+ default:
+ WARN_ON(1);
+ return -EOPNOTSUPP;
+ }
+
+ sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+ if (!sfd_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0,
+ mlxsw_sp_sfd_rec_policy(dynamic), mac, fid,
+ MLXSW_REG_SFD_REC_ACTION_NOP, uip,
+ sfd_proto);
+ num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+ if (err)
+ goto out;
+
+ if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+ err = -EBUSY;
+
+out:
+ kfree(sfd_pl);
+ return err;
+}
+
static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
const char *mac, u16 fid, bool adding,
enum mlxsw_reg_sfd_rec_action action,
@@ -1949,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
}
+static int
+mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+static void
+mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev)
+{
+}
+
static struct mlxsw_sp_fid *
mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
u16 vid)
@@ -1958,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
}
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static u16
+mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct mlxsw_sp_fid *fid)
+{
+ return mlxsw_sp_fid_8021q_vid(fid);
+}
+
static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = {
.port_join = mlxsw_sp_bridge_8021q_port_join,
.port_leave = mlxsw_sp_bridge_8021q_port_leave,
+ .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join,
+ .vxlan_leave = mlxsw_sp_bridge_8021q_vxlan_leave,
.fid_get = mlxsw_sp_bridge_8021q_fid_get,
+ .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup,
+ .fid_vid = mlxsw_sp_bridge_8021q_fid_vid,
};
static bool
@@ -2025,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
}
+static int
+mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+ struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+ struct mlxsw_sp_nve_params params = {
+ .type = MLXSW_SP_NVE_TYPE_VXLAN,
+ .vni = vxlan->cfg.vni,
+ .dev = vxlan_dev,
+ };
+ struct mlxsw_sp_fid *fid;
+ int err;
+
+ fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+ if (!fid)
+ return -EINVAL;
+
+ if (mlxsw_sp_fid_vni_is_set(fid))
+ return -EINVAL;
+
+ err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
+ if (err)
+ goto err_nve_fid_enable;
+
+ /* The tunnel port does not hold a reference on the FID. Only
+ * local ports and the router port
+ */
+ mlxsw_sp_fid_put(fid);
+
+ return 0;
+
+err_nve_fid_enable:
+ mlxsw_sp_fid_put(fid);
+ return err;
+}
+
+static void
+mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+ struct mlxsw_sp_fid *fid;
+
+ fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+ if (WARN_ON(!fid))
+ return;
+
+ /* If the VxLAN device is down, then the FID does not have a VNI */
+ if (!mlxsw_sp_fid_vni_is_set(fid))
+ goto out;
+
+ mlxsw_sp_nve_fid_disable(mlxsw_sp, fid);
+out:
+ mlxsw_sp_fid_put(fid);
+}
+
static struct mlxsw_sp_fid *
mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
u16 vid)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+ struct net_device *vxlan_dev;
+ struct mlxsw_sp_fid *fid;
+ int err;
+
+ fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+ if (IS_ERR(fid))
+ return fid;
+
+ if (mlxsw_sp_fid_vni_is_set(fid))
+ return fid;
+
+ vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev);
+ if (!vxlan_dev)
+ return fid;
+
+ if (!netif_running(vxlan_dev))
+ return fid;
+
+ err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL);
+ if (err)
+ goto err_vxlan_join;
+
+ return fid;
+
+err_vxlan_join:
+ mlxsw_sp_fid_put(fid);
+ return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
- return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+ /* The only valid VLAN for a VLAN-unaware bridge is 0 */
+ if (vid)
+ return NULL;
+
+ return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+}
+
+static u16
+mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct mlxsw_sp_fid *fid)
+{
+ return 0;
}
static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
.port_join = mlxsw_sp_bridge_8021d_port_join,
.port_leave = mlxsw_sp_bridge_8021d_port_leave,
+ .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join,
+ .vxlan_leave = mlxsw_sp_bridge_8021d_vxlan_leave,
.fid_get = mlxsw_sp_bridge_8021d_fid_get,
+ .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup,
+ .fid_vid = mlxsw_sp_bridge_8021d_fid_vid,
};
int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2087,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
}
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (WARN_ON(!bridge_device))
+ return -EINVAL;
+
+ return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack);
+}
+
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (WARN_ON(!bridge_device))
+ return;
+
+ bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev);
+}
+
static void
mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
const char *mac, u16 vid,
- struct net_device *dev)
+ struct net_device *dev, bool offloaded)
{
struct switchdev_notifier_fdb_info info;
info.addr = mac;
info.vid = vid;
+ info.offloaded = offloaded;
call_switchdev_notifiers(type, dev, &info.info);
}
@@ -2147,7 +2372,7 @@ do_fdb_op:
if (!do_notification)
return;
type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
- mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+ mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
return;
@@ -2207,7 +2432,7 @@ do_fdb_op:
if (!do_notification)
return;
type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
- mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+ mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
return;
@@ -2283,11 +2508,126 @@ out:
struct mlxsw_sp_switchdev_event_work {
struct work_struct work;
- struct switchdev_notifier_fdb_info fdb_info;
+ union {
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+ };
struct net_device *dev;
unsigned long event;
};
+static void
+mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
+ enum mlxsw_sp_l3proto *proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ if (vxlan_addr->sa.sa_family == AF_INET) {
+ addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
+ *proto = MLXSW_SP_L3_PROTO_IPV4;
+ } else {
+ addr->addr6 = vxlan_addr->sin6.sin6_addr;
+ *proto = MLXSW_SP_L3_PROTO_IPV6;
+ }
+}
+
+static void
+mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_switchdev_event_work *
+ switchdev_work,
+ struct mlxsw_sp_fid *fid, __be32 vni)
+{
+ struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct net_device *dev = switchdev_work->dev;
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr addr;
+ int err;
+
+ fdb_info = &switchdev_work->fdb_info;
+ err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info);
+ if (err)
+ return;
+
+ mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip,
+ &proto, &addr);
+
+ switch (switchdev_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+ vxlan_fdb_info.eth_addr,
+ mlxsw_sp_fid_index(fid),
+ proto, &addr, true, false);
+ if (err)
+ return;
+ vxlan_fdb_info.offloaded = true;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info.info);
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ vxlan_fdb_info.eth_addr,
+ fdb_info->vid, dev, true);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+ vxlan_fdb_info.eth_addr,
+ mlxsw_sp_fid_index(fid),
+ proto, &addr, false,
+ false);
+ vxlan_fdb_info.offloaded = false;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info.info);
+ break;
+ }
+}
+
+static void
+mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
+ switchdev_work)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = switchdev_work->dev;
+ struct net_device *br_dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_fid *fid;
+ __be32 vni;
+ int err;
+
+ if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE &&
+ switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE)
+ return;
+
+ if (!switchdev_work->fdb_info.added_by_user)
+ return;
+
+ if (!netif_running(dev))
+ return;
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev)
+ return;
+ if (!netif_is_bridge_master(br_dev))
+ return;
+ mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+ if (!mlxsw_sp)
+ return;
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+
+ fid = bridge_device->ops->fid_lookup(bridge_device,
+ switchdev_work->fdb_info.vid);
+ if (!fid)
+ return;
+
+ err = mlxsw_sp_fid_vni(fid, &vni);
+ if (err)
+ goto out;
+
+ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid,
+ vni);
+
+out:
+ mlxsw_sp_fid_put(fid);
+}
+
static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
{
struct mlxsw_sp_switchdev_event_work *switchdev_work =
@@ -2298,6 +2638,11 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
int err;
rtnl_lock();
+ if (netif_is_vxlan(dev)) {
+ mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work);
+ goto out;
+ }
+
mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
if (!mlxsw_sp_port)
goto out;
@@ -2312,7 +2657,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
break;
mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
fdb_info->addr,
- fdb_info->vid, dev);
+ fdb_info->vid, dev, true);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
@@ -2337,6 +2682,189 @@ out:
dev_put(dev);
}
+static void
+mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_switchdev_event_work *
+ switchdev_work)
+{
+ struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = switchdev_work->dev;
+ u8 all_zeros_mac[ETH_ALEN] = { 0 };
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr addr;
+ struct net_device *br_dev;
+ struct mlxsw_sp_fid *fid;
+ u16 vid;
+ int err;
+
+ vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+ br_dev = netdev_master_upper_dev_get(dev);
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+
+ fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+ if (!fid)
+ return;
+
+ mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+ &proto, &addr);
+
+ if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+ err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr);
+ if (err) {
+ mlxsw_sp_fid_put(fid);
+ return;
+ }
+ vxlan_fdb_info->offloaded = true;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info->info);
+ mlxsw_sp_fid_put(fid);
+ return;
+ }
+
+ /* The device has a single FDB table, whereas Linux has two - one
+ * in the bridge driver and another in the VxLAN driver. We only
+ * program an entry to the device if the MAC points to the VxLAN
+ * device in the bridge's FDB table
+ */
+ vid = bridge_device->ops->fid_vid(bridge_device, fid);
+ if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev)
+ goto err_br_fdb_find;
+
+ err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+ mlxsw_sp_fid_index(fid), proto,
+ &addr, true, false);
+ if (err)
+ goto err_fdb_tunnel_uc_op;
+ vxlan_fdb_info->offloaded = true;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info->info);
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ vxlan_fdb_info->eth_addr, vid, dev, true);
+
+ mlxsw_sp_fid_put(fid);
+
+ return;
+
+err_fdb_tunnel_uc_op:
+err_br_fdb_find:
+ mlxsw_sp_fid_put(fid);
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_switchdev_event_work *
+ switchdev_work)
+{
+ struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = switchdev_work->dev;
+ struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+ u8 all_zeros_mac[ETH_ALEN] = { 0 };
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr addr;
+ struct mlxsw_sp_fid *fid;
+ u16 vid;
+
+ vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+
+ fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+ if (!fid)
+ return;
+
+ mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+ &proto, &addr);
+
+ if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+ mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr);
+ mlxsw_sp_fid_put(fid);
+ return;
+ }
+
+ mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+ mlxsw_sp_fid_index(fid), proto, &addr,
+ false, false);
+ vid = bridge_device->ops->fid_vid(bridge_device, fid);
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ vxlan_fdb_info->eth_addr, vid, dev, false);
+
+ mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_switchdev_event_work *switchdev_work =
+ container_of(work, struct mlxsw_sp_switchdev_event_work, work);
+ struct net_device *dev = switchdev_work->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct net_device *br_dev;
+
+ rtnl_lock();
+
+ if (!netif_running(dev))
+ goto out;
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev)
+ goto out;
+ if (!netif_is_bridge_master(br_dev))
+ goto out;
+ mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ switch (switchdev_work->event) {
+ case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+ mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work);
+ break;
+ case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+ mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work);
+ break;
+ }
+
+out:
+ rtnl_unlock();
+ kfree(switchdev_work);
+ dev_put(dev);
+}
+
+static int
+mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work *
+ switchdev_work,
+ struct switchdev_notifier_info *info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev);
+ struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+ struct vxlan_config *cfg = &vxlan->cfg;
+
+ vxlan_fdb_info = container_of(info,
+ struct switchdev_notifier_vxlan_fdb_info,
+ info);
+
+ if (vxlan_fdb_info->remote_port != cfg->dst_port)
+ return -EOPNOTSUPP;
+ if (vxlan_fdb_info->remote_vni != cfg->vni)
+ return -EOPNOTSUPP;
+ if (vxlan_fdb_info->vni != cfg->vni)
+ return -EOPNOTSUPP;
+ if (vxlan_fdb_info->remote_ifindex)
+ return -EOPNOTSUPP;
+ if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr))
+ return -EOPNOTSUPP;
+ if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip))
+ return -EOPNOTSUPP;
+
+ switchdev_work->vxlan_fdb_info = *vxlan_fdb_info;
+
+ return 0;
+}
+
/* Called under rcu_read_lock() */
static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
@@ -2346,6 +2874,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
struct switchdev_notifier_fdb_info *fdb_info;
struct switchdev_notifier_info *info = ptr;
struct net_device *br_dev;
+ int err;
/* Tunnel devices are not our uppers, so check their master instead */
br_dev = netdev_master_upper_dev_get_rcu(dev);
@@ -2386,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
*/
dev_hold(dev);
break;
+ case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */
+ case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+ INIT_WORK(&switchdev_work->work,
+ mlxsw_sp_switchdev_vxlan_fdb_event_work);
+ err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work,
+ info);
+ if (err)
+ goto err_vxlan_work_prepare;
+ dev_hold(dev);
+ break;
default:
kfree(switchdev_work);
return NOTIFY_DONE;
@@ -2395,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
return NOTIFY_DONE;
+err_vxlan_work_prepare:
err_addr_alloc:
kfree(switchdev_work);
return NOTIFY_BAD;
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 9b8a17ee3cb3..3238b9ee42f3 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -133,9 +133,9 @@ static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
{
unsigned int val, timeout = 10;
- /* Wait for the issued mac table command to be completed, or timeout.
- * When the command read from ANA_TABLES_MACACCESS is
- * MACACCESS_CMD_IDLE, the issued command completed successfully.
+ /* Wait for the issued vlan table command to be completed, or timeout.
+ * When the command read from ANA_TABLES_VLANACCESS is
+ * VLANACCESS_CMD_IDLE, the issued command completed successfully.
*/
do {
val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index f2b1938236fe..244dc261006e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -399,12 +399,14 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
switch (off) {
case offsetof(struct iphdr, daddr):
- set_ip_addr->ipv4_dst_mask = mask;
- set_ip_addr->ipv4_dst = exact;
+ set_ip_addr->ipv4_dst_mask |= mask;
+ set_ip_addr->ipv4_dst &= ~mask;
+ set_ip_addr->ipv4_dst |= exact & mask;
break;
case offsetof(struct iphdr, saddr):
- set_ip_addr->ipv4_src_mask = mask;
- set_ip_addr->ipv4_src = exact;
+ set_ip_addr->ipv4_src_mask |= mask;
+ set_ip_addr->ipv4_src &= ~mask;
+ set_ip_addr->ipv4_src |= exact & mask;
break;
default:
return -EOPNOTSUPP;
@@ -418,11 +420,12 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
}
static void
-nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask,
+nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask,
struct nfp_fl_set_ipv6_addr *ip6)
{
- ip6->ipv6[idx % 4].mask = mask;
- ip6->ipv6[idx % 4].exact = exact;
+ ip6->ipv6[word].mask |= mask;
+ ip6->ipv6[word].exact &= ~mask;
+ ip6->ipv6[word].exact |= exact & mask;
ip6->reserved = cpu_to_be16(0);
ip6->head.jump_id = opcode_tag;
@@ -435,6 +438,7 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
struct nfp_fl_set_ipv6_addr *ip_src)
{
__be32 exact, mask;
+ u8 word;
/* We are expecting tcf_pedit to return a big endian value */
mask = (__force __be32)~tcf_pedit_mask(action, idx);
@@ -443,17 +447,20 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
if (exact & ~mask)
return -EOPNOTSUPP;
- if (off < offsetof(struct ipv6hdr, saddr))
+ if (off < offsetof(struct ipv6hdr, saddr)) {
return -EOPNOTSUPP;
- else if (off < offsetof(struct ipv6hdr, daddr))
- nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx,
+ } else if (off < offsetof(struct ipv6hdr, daddr)) {
+ word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact);
+ nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word,
exact, mask, ip_src);
- else if (off < offsetof(struct ipv6hdr, daddr) +
- sizeof(struct in6_addr))
- nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx,
+ } else if (off < offsetof(struct ipv6hdr, daddr) +
+ sizeof(struct in6_addr)) {
+ word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact);
+ nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word,
exact, mask, ip_dst);
- else
+ } else {
return -EOPNOTSUPP;
+ }
return 0;
}
@@ -511,7 +518,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
struct nfp_fl_set_eth set_eth;
enum pedit_header_type htype;
int idx, nkeys, err;
- size_t act_size;
+ size_t act_size = 0;
u32 offset, cmd;
u8 ip_proto = 0;
@@ -569,7 +576,9 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
act_size = sizeof(set_eth);
memcpy(nfp_action, &set_eth, act_size);
*a_len += act_size;
- } else if (set_ip_addr.head.len_lw) {
+ }
+ if (set_ip_addr.head.len_lw) {
+ nfp_action += act_size;
act_size = sizeof(set_ip_addr);
memcpy(nfp_action, &set_ip_addr, act_size);
*a_len += act_size;
@@ -577,10 +586,12 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
/* Hardware will automatically fix IPv4 and TCP/UDP checksum. */
*csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR |
nfp_fl_csum_l4_to_flag(ip_proto);
- } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
+ }
+ if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
/* TC compiles set src and dst IPv6 address as a single action,
* the hardware requires this to be 2 separate actions.
*/
+ nfp_action += act_size;
act_size = sizeof(set_ip6_src);
memcpy(nfp_action, &set_ip6_src, act_size);
*a_len += act_size;
@@ -593,6 +604,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
/* Hardware will automatically fix TCP/UDP checksum. */
*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
} else if (set_ip6_dst.head.len_lw) {
+ nfp_action += act_size;
act_size = sizeof(set_ip6_dst);
memcpy(nfp_action, &set_ip6_dst, act_size);
*a_len += act_size;
@@ -600,13 +612,16 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
/* Hardware will automatically fix TCP/UDP checksum. */
*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
} else if (set_ip6_src.head.len_lw) {
+ nfp_action += act_size;
act_size = sizeof(set_ip6_src);
memcpy(nfp_action, &set_ip6_src, act_size);
*a_len += act_size;
/* Hardware will automatically fix TCP/UDP checksum. */
*csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto);
- } else if (set_tport.head.len_lw) {
+ }
+ if (set_tport.head.len_lw) {
+ nfp_action += act_size;
act_size = sizeof(set_tport);
memcpy(nfp_action, &set_tport, act_size);
*a_len += act_size;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 30c926c4bc47..8e5bec04d1f9 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -4,6 +4,7 @@
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
#include <net/netevent.h>
+#include <net/vxlan.h>
#include <linux/idr.h>
#include <net/dst_metadata.h>
#include <net/arp.h>
@@ -187,7 +188,7 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
return false;
if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
return true;
- if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
+ if (netif_is_vxlan(netdev))
return true;
return false;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 8b23d2848457..25382f8fbb70 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -19,34 +19,18 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/crc32.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
#include <linux/clk.h>
-#include <linux/workqueue.h>
-#include <linux/netdevice.h>
+#include <linux/crc32.h>
#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/phy.h>
-#include <linux/dma-mapping.h>
-#include <linux/of.h>
+#include <linux/module.h>
#include <linux/of_net.h>
-#include <linux/types.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
-#include <linux/io.h>
#include <mach/board.h>
-#include <mach/platform.h>
#include <mach/hardware.h>
+#include <mach/platform.h>
#define MODNAME "lpc-eth"
#define DRV_VERSION "1.00"
@@ -1257,18 +1241,19 @@ static const struct net_device_ops lpc_netdev_ops = {
static int lpc_eth_drv_probe(struct platform_device *pdev)
{
- struct resource *res;
- struct net_device *ndev;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
struct netdata_local *pldat;
- struct phy_device *phydev;
+ struct net_device *ndev;
dma_addr_t dma_handle;
+ struct resource *res;
int irq, ret;
u32 tmp;
/* Setup network interface for RMII or MII mode */
tmp = __raw_readl(LPC32XX_CLKPWR_MACCLK_CTRL);
tmp &= ~LPC32XX_CLKPWR_MACCTRL_PINS_MSK;
- if (lpc_phy_interface_mode(&pdev->dev) == PHY_INTERFACE_MODE_MII)
+ if (lpc_phy_interface_mode(dev) == PHY_INTERFACE_MODE_MII)
tmp |= LPC32XX_CLKPWR_MACCTRL_USE_MII_PINS;
else
tmp |= LPC32XX_CLKPWR_MACCTRL_USE_RMII_PINS;
@@ -1278,7 +1263,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq = platform_get_irq(pdev, 0);
if (!res || irq < 0) {
- dev_err(&pdev->dev, "error getting resources.\n");
+ dev_err(dev, "error getting resources.\n");
ret = -ENXIO;
goto err_exit;
}
@@ -1286,12 +1271,12 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
/* Allocate net driver data structure */
ndev = alloc_etherdev(sizeof(struct netdata_local));
if (!ndev) {
- dev_err(&pdev->dev, "could not allocate device.\n");
+ dev_err(dev, "could not allocate device.\n");
ret = -ENOMEM;
goto err_exit;
}
- SET_NETDEV_DEV(ndev, &pdev->dev);
+ SET_NETDEV_DEV(ndev, dev);
pldat = netdev_priv(ndev);
pldat->pdev = pdev;
@@ -1303,9 +1288,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
ndev->irq = irq;
/* Get clock for the device */
- pldat->clk = clk_get(&pdev->dev, NULL);
+ pldat->clk = clk_get(dev, NULL);
if (IS_ERR(pldat->clk)) {
- dev_err(&pdev->dev, "error getting clock.\n");
+ dev_err(dev, "error getting clock.\n");
ret = PTR_ERR(pldat->clk);
goto err_out_free_dev;
}
@@ -1318,14 +1303,14 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
/* Map IO space */
pldat->net_base = ioremap(res->start, resource_size(res));
if (!pldat->net_base) {
- dev_err(&pdev->dev, "failed to map registers\n");
+ dev_err(dev, "failed to map registers\n");
ret = -ENOMEM;
goto err_out_disable_clocks;
}
ret = request_irq(ndev->irq, __lpc_eth_interrupt, 0,
ndev->name, ndev);
if (ret) {
- dev_err(&pdev->dev, "error requesting interrupt.\n");
+ dev_err(dev, "error requesting interrupt.\n");
goto err_out_iounmap;
}
@@ -1339,7 +1324,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
sizeof(struct txrx_desc_t) + sizeof(struct rx_status_t));
pldat->dma_buff_base_v = 0;
- if (use_iram_for_net(&pldat->pdev->dev)) {
+ if (use_iram_for_net(dev)) {
dma_handle = LPC32XX_IRAM_BASE;
if (pldat->dma_buff_size <= lpc32xx_return_iram_size())
pldat->dma_buff_base_v =
@@ -1350,7 +1335,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
}
if (pldat->dma_buff_base_v == 0) {
- ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
if (ret)
goto err_out_free_irq;
@@ -1359,7 +1344,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
/* Allocate a chunk of memory for the DMA ethernet buffers
and descriptors */
pldat->dma_buff_base_v =
- dma_alloc_coherent(&pldat->pdev->dev,
+ dma_alloc_coherent(dev,
pldat->dma_buff_size, &dma_handle,
GFP_KERNEL);
if (pldat->dma_buff_base_v == NULL) {
@@ -1384,7 +1369,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
__lpc_get_mac(pldat, ndev->dev_addr);
if (!is_valid_ether_addr(ndev->dev_addr)) {
- const char *macaddr = of_get_mac_address(pdev->dev.of_node);
+ const char *macaddr = of_get_mac_address(np);
if (macaddr)
memcpy(ndev->dev_addr, macaddr, ETH_ALEN);
}
@@ -1414,7 +1399,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
ret = register_netdev(ndev);
if (ret) {
- dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
+ dev_err(dev, "Cannot register net device, aborting.\n");
goto err_out_dma_unmap;
}
platform_set_drvdata(pdev, ndev);
@@ -1426,19 +1411,17 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
netdev_info(ndev, "LPC mac at 0x%08x irq %d\n",
res->start, ndev->irq);
- phydev = ndev->phydev;
-
- device_init_wakeup(&pdev->dev, 1);
- device_set_wakeup_enable(&pdev->dev, 0);
+ device_init_wakeup(dev, 1);
+ device_set_wakeup_enable(dev, 0);
return 0;
err_out_unregister_netdev:
unregister_netdev(ndev);
err_out_dma_unmap:
- if (!use_iram_for_net(&pldat->pdev->dev) ||
+ if (!use_iram_for_net(dev) ||
pldat->dma_buff_size > lpc32xx_return_iram_size())
- dma_free_coherent(&pldat->pdev->dev, pldat->dma_buff_size,
+ dma_free_coherent(dev, pldat->dma_buff_size,
pldat->dma_buff_base_v,
pldat->dma_buff_base_p);
err_out_free_irq:
@@ -1533,13 +1516,11 @@ static int lpc_eth_drv_resume(struct platform_device *pdev)
}
#endif
-#ifdef CONFIG_OF
static const struct of_device_id lpc_eth_match[] = {
{ .compatible = "nxp,lpc-eth" },
{ }
};
MODULE_DEVICE_TABLE(of, lpc_eth_match);
-#endif
static struct platform_driver lpc_eth_driver = {
.probe = lpc_eth_drv_probe,
@@ -1550,7 +1531,7 @@ static struct platform_driver lpc_eth_driver = {
#endif
.driver = {
.name = MODNAME,
- .of_match_table = of_match_ptr(lpc_eth_match),
+ .of_match_table = lpc_eth_match,
},
};
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 5f0962d353ce..d9a03aba0e02 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -915,7 +915,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
/* Prototypes */
int qed_fill_dev_info(struct qed_dev *cdev,
struct qed_dev_info *dev_info);
-void qed_link_update(struct qed_hwfn *hwfn);
+void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index f2dfc7a976c0..5c221ebaa7b3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12207,11 +12207,56 @@ struct public_port {
u32 transceiver_data;
#define ETH_TRANSCEIVER_STATE_MASK 0x000000FF
#define ETH_TRANSCEIVER_STATE_SHIFT 0x00000000
+#define ETH_TRANSCEIVER_STATE_OFFSET 0x00000000
#define ETH_TRANSCEIVER_STATE_UNPLUGGED 0x00000000
#define ETH_TRANSCEIVER_STATE_PRESENT 0x00000001
#define ETH_TRANSCEIVER_STATE_VALID 0x00000003
#define ETH_TRANSCEIVER_STATE_UPDATING 0x00000008
-
+#define ETH_TRANSCEIVER_TYPE_MASK 0x0000FF00
+#define ETH_TRANSCEIVER_TYPE_OFFSET 0x8
+#define ETH_TRANSCEIVER_TYPE_NONE 0x00
+#define ETH_TRANSCEIVER_TYPE_UNKNOWN 0xFF
+#define ETH_TRANSCEIVER_TYPE_1G_PCC 0x01
+#define ETH_TRANSCEIVER_TYPE_1G_ACC 0x02
+#define ETH_TRANSCEIVER_TYPE_1G_LX 0x03
+#define ETH_TRANSCEIVER_TYPE_1G_SX 0x04
+#define ETH_TRANSCEIVER_TYPE_10G_SR 0x05
+#define ETH_TRANSCEIVER_TYPE_10G_LR 0x06
+#define ETH_TRANSCEIVER_TYPE_10G_LRM 0x07
+#define ETH_TRANSCEIVER_TYPE_10G_ER 0x08
+#define ETH_TRANSCEIVER_TYPE_10G_PCC 0x09
+#define ETH_TRANSCEIVER_TYPE_10G_ACC 0x0a
+#define ETH_TRANSCEIVER_TYPE_XLPPI 0x0b
+#define ETH_TRANSCEIVER_TYPE_40G_LR4 0x0c
+#define ETH_TRANSCEIVER_TYPE_40G_SR4 0x0d
+#define ETH_TRANSCEIVER_TYPE_40G_CR4 0x0e
+#define ETH_TRANSCEIVER_TYPE_100G_AOC 0x0f
+#define ETH_TRANSCEIVER_TYPE_100G_SR4 0x10
+#define ETH_TRANSCEIVER_TYPE_100G_LR4 0x11
+#define ETH_TRANSCEIVER_TYPE_100G_ER4 0x12
+#define ETH_TRANSCEIVER_TYPE_100G_ACC 0x13
+#define ETH_TRANSCEIVER_TYPE_100G_CR4 0x14
+#define ETH_TRANSCEIVER_TYPE_4x10G_SR 0x15
+#define ETH_TRANSCEIVER_TYPE_25G_CA_N 0x16
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_S 0x17
+#define ETH_TRANSCEIVER_TYPE_25G_CA_S 0x18
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_M 0x19
+#define ETH_TRANSCEIVER_TYPE_25G_CA_L 0x1a
+#define ETH_TRANSCEIVER_TYPE_25G_ACC_L 0x1b
+#define ETH_TRANSCEIVER_TYPE_25G_SR 0x1c
+#define ETH_TRANSCEIVER_TYPE_25G_LR 0x1d
+#define ETH_TRANSCEIVER_TYPE_25G_AOC 0x1e
+#define ETH_TRANSCEIVER_TYPE_4x10G 0x1f
+#define ETH_TRANSCEIVER_TYPE_4x25G_CR 0x20
+#define ETH_TRANSCEIVER_TYPE_1000BASET 0x21
+#define ETH_TRANSCEIVER_TYPE_10G_BASET 0x22
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR 0x30
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR 0x31
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR 0x32
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR 0x33
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR 0x34
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR 0x35
+#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC 0x36
u32 wol_info;
u32 wol_pkt_len;
u32 wol_pkt_details;
@@ -13199,6 +13244,13 @@ struct nvm_cfg1_port {
u32 transceiver_00;
u32 device_ids;
u32 board_cfg;
+#define NVM_CFG1_PORT_PORT_TYPE_MASK 0x000000FF
+#define NVM_CFG1_PORT_PORT_TYPE_OFFSET 0
+#define NVM_CFG1_PORT_PORT_TYPE_UNDEFINED 0x0
+#define NVM_CFG1_PORT_PORT_TYPE_MODULE 0x1
+#define NVM_CFG1_PORT_PORT_TYPE_BACKPLANE 0x2
+#define NVM_CFG1_PORT_PORT_TYPE_EXT_PHY 0x3
+#define NVM_CFG1_PORT_PORT_TYPE_MODULE_SLAVE 0x4
u32 mnm_10g_cap;
u32 mnm_10g_ctrl;
u32 mnm_10g_misc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index af3a28ec04eb..0f0aba793352 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -228,7 +228,7 @@ static int qed_grc_attn_cb(struct qed_hwfn *p_hwfn)
attn_master_to_str(GET_FIELD(tmp, QED_GRC_ATTENTION_MASTER)),
GET_FIELD(tmp2, QED_GRC_ATTENTION_PF),
(GET_FIELD(tmp2, QED_GRC_ATTENTION_PRIV) ==
- QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Ireelevant)",
+ QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant)",
GET_FIELD(tmp2, QED_GRC_ATTENTION_VF));
out:
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 75d217aaf8ce..35fd0db6a677 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -58,6 +58,7 @@
#include "qed_iscsi.h"
#include "qed_mcp.h"
+#include "qed_reg_addr.h"
#include "qed_hw.h"
#include "qed_selftest.h"
#include "qed_debug.h"
@@ -1304,6 +1305,7 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
struct qed_hwfn *hwfn;
struct qed_mcp_link_params *link_params;
struct qed_ptt *ptt;
+ u32 sup_caps;
int rc;
if (!cdev)
@@ -1330,26 +1332,50 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
link_params->speed.autoneg = params->autoneg;
if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
link_params->speed.advertised_speeds = 0;
- if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
- (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
+ sup_caps = QED_LM_1000baseT_Full_BIT |
+ QED_LM_1000baseKX_Full_BIT |
+ QED_LM_1000baseX_Full_BIT;
+ if (params->adv_speeds & sup_caps)
link_params->speed.advertised_speeds |=
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
- if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
+ sup_caps = QED_LM_10000baseT_Full_BIT |
+ QED_LM_10000baseKR_Full_BIT |
+ QED_LM_10000baseKX4_Full_BIT |
+ QED_LM_10000baseR_FEC_BIT |
+ QED_LM_10000baseCR_Full_BIT |
+ QED_LM_10000baseSR_Full_BIT |
+ QED_LM_10000baseLR_Full_BIT |
+ QED_LM_10000baseLRM_Full_BIT;
+ if (params->adv_speeds & sup_caps)
link_params->speed.advertised_speeds |=
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
if (params->adv_speeds & QED_LM_20000baseKR2_Full_BIT)
link_params->speed.advertised_speeds |=
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G;
- if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
+ sup_caps = QED_LM_25000baseKR_Full_BIT |
+ QED_LM_25000baseCR_Full_BIT |
+ QED_LM_25000baseSR_Full_BIT;
+ if (params->adv_speeds & sup_caps)
link_params->speed.advertised_speeds |=
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
- if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT)
+ sup_caps = QED_LM_40000baseLR4_Full_BIT |
+ QED_LM_40000baseKR4_Full_BIT |
+ QED_LM_40000baseCR4_Full_BIT |
+ QED_LM_40000baseSR4_Full_BIT;
+ if (params->adv_speeds & sup_caps)
link_params->speed.advertised_speeds |=
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
- if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT)
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+ sup_caps = QED_LM_50000baseKR2_Full_BIT |
+ QED_LM_50000baseCR2_Full_BIT |
+ QED_LM_50000baseSR2_Full_BIT;
+ if (params->adv_speeds & sup_caps)
link_params->speed.advertised_speeds |=
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
- if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT)
+ sup_caps = QED_LM_100000baseKR4_Full_BIT |
+ QED_LM_100000baseSR4_Full_BIT |
+ QED_LM_100000baseCR4_Full_BIT |
+ QED_LM_100000baseLR4_ER4_Full_BIT;
+ if (params->adv_speeds & sup_caps)
link_params->speed.advertised_speeds |=
NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
}
@@ -1462,12 +1488,149 @@ static int qed_get_link_data(struct qed_hwfn *hwfn,
return 0;
}
+static void qed_fill_link_capability(struct qed_hwfn *hwfn,
+ struct qed_ptt *ptt, u32 capability,
+ u32 *if_capability)
+{
+ u32 media_type, tcvr_state, tcvr_type;
+ u32 speed_mask, board_cfg;
+
+ if (qed_mcp_get_media_type(hwfn, ptt, &media_type))
+ media_type = MEDIA_UNSPECIFIED;
+
+ if (qed_mcp_get_transceiver_data(hwfn, ptt, &tcvr_state, &tcvr_type))
+ tcvr_type = ETH_TRANSCEIVER_STATE_UNPLUGGED;
+
+ if (qed_mcp_trans_speed_mask(hwfn, ptt, &speed_mask))
+ speed_mask = 0xFFFFFFFF;
+
+ if (qed_mcp_get_board_config(hwfn, ptt, &board_cfg))
+ board_cfg = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+
+ DP_VERBOSE(hwfn->cdev, NETIF_MSG_DRV,
+ "Media_type = 0x%x tcvr_state = 0x%x tcvr_type = 0x%x speed_mask = 0x%x board_cfg = 0x%x\n",
+ media_type, tcvr_state, tcvr_type, speed_mask, board_cfg);
+
+ switch (media_type) {
+ case MEDIA_DA_TWINAX:
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+ *if_capability |= QED_LM_20000baseKR2_Full_BIT;
+ /* For DAC media multiple speed capabilities are supported*/
+ capability = capability & speed_mask;
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+ *if_capability |= QED_LM_1000baseKX_Full_BIT;
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+ *if_capability |= QED_LM_10000baseCR_Full_BIT;
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+ *if_capability |= QED_LM_40000baseCR4_Full_BIT;
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+ *if_capability |= QED_LM_25000baseCR_Full_BIT;
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+ *if_capability |= QED_LM_50000baseCR2_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+ *if_capability |= QED_LM_100000baseCR4_Full_BIT;
+ break;
+ case MEDIA_BASE_T:
+ if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_EXT_PHY) {
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+ *if_capability |= QED_LM_1000baseT_Full_BIT;
+ }
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+ *if_capability |= QED_LM_10000baseT_Full_BIT;
+ }
+ }
+ if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_MODULE) {
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_1000BASET)
+ *if_capability |= QED_LM_1000baseT_Full_BIT;
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_BASET)
+ *if_capability |= QED_LM_10000baseT_Full_BIT;
+ }
+ break;
+ case MEDIA_SFP_1G_FIBER:
+ case MEDIA_SFPP_10G_FIBER:
+ case MEDIA_XFP_FIBER:
+ case MEDIA_MODULE_FIBER:
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+ if ((tcvr_type == ETH_TRANSCEIVER_TYPE_1G_LX) ||
+ (tcvr_type == ETH_TRANSCEIVER_TYPE_1G_SX))
+ *if_capability |= QED_LM_1000baseKX_Full_BIT;
+ }
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_SR)
+ *if_capability |= QED_LM_10000baseSR_Full_BIT;
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LR)
+ *if_capability |= QED_LM_10000baseLR_Full_BIT;
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LRM)
+ *if_capability |= QED_LM_10000baseLRM_Full_BIT;
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_ER)
+ *if_capability |= QED_LM_10000baseR_FEC_BIT;
+ }
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+ *if_capability |= QED_LM_20000baseKR2_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) {
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_25G_SR)
+ *if_capability |= QED_LM_25000baseSR_Full_BIT;
+ }
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) {
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_LR4)
+ *if_capability |= QED_LM_40000baseLR4_Full_BIT;
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_SR4)
+ *if_capability |= QED_LM_40000baseSR4_Full_BIT;
+ }
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+ *if_capability |= QED_LM_50000baseKR2_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) {
+ if (tcvr_type == ETH_TRANSCEIVER_TYPE_100G_SR4)
+ *if_capability |= QED_LM_100000baseSR4_Full_BIT;
+ }
+
+ break;
+ case MEDIA_KR:
+ if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+ *if_capability |= QED_LM_20000baseKR2_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+ *if_capability |= QED_LM_1000baseKX_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+ *if_capability |= QED_LM_10000baseKR_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+ *if_capability |= QED_LM_25000baseKR_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+ *if_capability |= QED_LM_40000baseKR4_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+ *if_capability |= QED_LM_50000baseKR2_Full_BIT;
+ if (capability &
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+ *if_capability |= QED_LM_100000baseKR4_Full_BIT;
+ break;
+ case MEDIA_UNSPECIFIED:
+ case MEDIA_NOT_PRESENT:
+ DP_VERBOSE(hwfn->cdev, QED_MSG_DEBUG,
+ "Unknown media and transceiver type;\n");
+ break;
+ }
+}
+
static void qed_fill_link(struct qed_hwfn *hwfn,
+ struct qed_ptt *ptt,
struct qed_link_output *if_link)
{
+ struct qed_mcp_link_capabilities link_caps;
struct qed_mcp_link_params params;
struct qed_mcp_link_state link;
- struct qed_mcp_link_capabilities link_caps;
u32 media_type;
memset(if_link, 0, sizeof(*if_link));
@@ -1498,58 +1661,20 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
if_link->advertised_caps |= QED_LM_Autoneg_BIT;
else
if_link->advertised_caps &= ~QED_LM_Autoneg_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
- if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
- QED_LM_1000baseT_Full_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
- if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
- if_link->advertised_caps |= QED_LM_20000baseKR2_Full_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
- if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
- if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
- if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
- if (params.speed.advertised_speeds &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
- if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
-
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
- if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
- QED_LM_1000baseT_Full_BIT;
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
- if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
- if_link->supported_caps |= QED_LM_20000baseKR2_Full_BIT;
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
- if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
- if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
- if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
- if (link_caps.speed_capabilities &
- NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
- if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
+
+ /* Fill link advertised capability*/
+ qed_fill_link_capability(hwfn, ptt, params.speed.advertised_speeds,
+ &if_link->advertised_caps);
+ /* Fill link supported capability*/
+ qed_fill_link_capability(hwfn, ptt, link_caps.speed_capabilities,
+ &if_link->supported_caps);
if (link.link_up)
if_link->speed = link.speed;
/* TODO - fill duplex properly */
if_link->duplex = DUPLEX_FULL;
- qed_mcp_get_media_type(hwfn->cdev, &media_type);
+ qed_mcp_get_media_type(hwfn, ptt, &media_type);
if_link->port = qed_get_port_type(media_type);
if_link->autoneg = params.speed.autoneg;
@@ -1562,9 +1687,8 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
/* Link partner capabilities */
- if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
- if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
- if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+ if (link.partner_adv_speed &
+ QED_LINK_PARTNER_SPEED_1G_FD)
if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
@@ -1607,21 +1731,34 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
static void qed_get_current_link(struct qed_dev *cdev,
struct qed_link_output *if_link)
{
+ struct qed_hwfn *hwfn;
+ struct qed_ptt *ptt;
int i;
- qed_fill_link(&cdev->hwfns[0], if_link);
+ hwfn = &cdev->hwfns[0];
+ if (IS_PF(cdev)) {
+ ptt = qed_ptt_acquire(hwfn);
+ if (ptt) {
+ qed_fill_link(hwfn, ptt, if_link);
+ qed_ptt_release(hwfn, ptt);
+ } else {
+ DP_NOTICE(hwfn, "Failed to fill link; No PTT\n");
+ }
+ } else {
+ qed_fill_link(hwfn, NULL, if_link);
+ }
for_each_hwfn(cdev, i)
qed_inform_vf_link_state(&cdev->hwfns[i]);
}
-void qed_link_update(struct qed_hwfn *hwfn)
+void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt)
{
void *cookie = hwfn->cdev->ops_cookie;
struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common;
struct qed_link_output if_link;
- qed_fill_link(hwfn, &if_link);
+ qed_fill_link(hwfn, ptt, &if_link);
qed_inform_vf_link_state(hwfn);
if (IS_LEAD_HWFN(hwfn) && cookie)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index b06e4cb72c6c..386ee5410237 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1447,7 +1447,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
- qed_link_update(p_hwfn);
+ qed_link_update(p_hwfn, p_ptt);
out:
spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
}
@@ -1867,12 +1867,12 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
return 0;
}
-int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_media_type)
{
- struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
- struct qed_ptt *p_ptt;
+ *p_media_type = MEDIA_UNSPECIFIED;
- if (IS_VF(cdev))
+ if (IS_VF(p_hwfn->cdev))
return -EINVAL;
if (!qed_mcp_is_init(p_hwfn)) {
@@ -1880,16 +1880,195 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
return -EBUSY;
}
- *p_media_type = MEDIA_UNSPECIFIED;
+ if (!p_ptt) {
+ *p_media_type = MEDIA_UNSPECIFIED;
+ return -EINVAL;
+ }
- p_ptt = qed_ptt_acquire(p_hwfn);
- if (!p_ptt)
+ *p_media_type = qed_rd(p_hwfn, p_ptt,
+ p_hwfn->mcp_info->port_addr +
+ offsetof(struct public_port,
+ media_type));
+
+ return 0;
+}
+
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *p_transceiver_state,
+ u32 *p_transceiver_type)
+{
+ u32 transceiver_info;
+
+ if (IS_VF(p_hwfn->cdev))
+ return -EINVAL;
+
+ if (!qed_mcp_is_init(p_hwfn)) {
+ DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
return -EBUSY;
+ }
- *p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
- offsetof(struct public_port, media_type));
+ *p_transceiver_type = ETH_TRANSCEIVER_TYPE_NONE;
+ *p_transceiver_state = ETH_TRANSCEIVER_STATE_UPDATING;
- qed_ptt_release(p_hwfn, p_ptt);
+ transceiver_info = qed_rd(p_hwfn, p_ptt,
+ p_hwfn->mcp_info->port_addr +
+ offsetof(struct public_port,
+ transceiver_data));
+
+ *p_transceiver_state = (transceiver_info &
+ ETH_TRANSCEIVER_STATE_MASK) >>
+ ETH_TRANSCEIVER_STATE_OFFSET;
+
+ if (*p_transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT)
+ *p_transceiver_type = (transceiver_info &
+ ETH_TRANSCEIVER_TYPE_MASK) >>
+ ETH_TRANSCEIVER_TYPE_OFFSET;
+ else
+ *p_transceiver_type = ETH_TRANSCEIVER_TYPE_UNKNOWN;
+
+ return 0;
+}
+static bool qed_is_transceiver_ready(u32 transceiver_state,
+ u32 transceiver_type)
+{
+ if ((transceiver_state & ETH_TRANSCEIVER_STATE_PRESENT) &&
+ ((transceiver_state & ETH_TRANSCEIVER_STATE_UPDATING) == 0x0) &&
+ (transceiver_type != ETH_TRANSCEIVER_TYPE_NONE))
+ return true;
+
+ return false;
+}
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_speed_mask)
+{
+ u32 transceiver_type, transceiver_state;
+
+ qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+ &transceiver_type);
+
+ if (qed_is_transceiver_ready(transceiver_state, transceiver_type) ==
+ false)
+ return -EINVAL;
+
+ switch (transceiver_type) {
+ case ETH_TRANSCEIVER_TYPE_1G_LX:
+ case ETH_TRANSCEIVER_TYPE_1G_SX:
+ case ETH_TRANSCEIVER_TYPE_1G_PCC:
+ case ETH_TRANSCEIVER_TYPE_1G_ACC:
+ case ETH_TRANSCEIVER_TYPE_1000BASET:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_10G_SR:
+ case ETH_TRANSCEIVER_TYPE_10G_LR:
+ case ETH_TRANSCEIVER_TYPE_10G_LRM:
+ case ETH_TRANSCEIVER_TYPE_10G_ER:
+ case ETH_TRANSCEIVER_TYPE_10G_PCC:
+ case ETH_TRANSCEIVER_TYPE_10G_ACC:
+ case ETH_TRANSCEIVER_TYPE_4x10G:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_40G_LR4:
+ case ETH_TRANSCEIVER_TYPE_40G_SR4:
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR:
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_100G_AOC:
+ case ETH_TRANSCEIVER_TYPE_100G_SR4:
+ case ETH_TRANSCEIVER_TYPE_100G_LR4:
+ case ETH_TRANSCEIVER_TYPE_100G_ER4:
+ case ETH_TRANSCEIVER_TYPE_100G_ACC:
+ *p_speed_mask =
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_25G_SR:
+ case ETH_TRANSCEIVER_TYPE_25G_LR:
+ case ETH_TRANSCEIVER_TYPE_25G_AOC:
+ case ETH_TRANSCEIVER_TYPE_25G_ACC_S:
+ case ETH_TRANSCEIVER_TYPE_25G_ACC_M:
+ case ETH_TRANSCEIVER_TYPE_25G_ACC_L:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_25G_CA_N:
+ case ETH_TRANSCEIVER_TYPE_25G_CA_S:
+ case ETH_TRANSCEIVER_TYPE_25G_CA_L:
+ case ETH_TRANSCEIVER_TYPE_4x25G_CR:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_40G_CR4:
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_100G_CR4:
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR:
+ *p_speed_mask =
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR:
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR:
+ case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC:
+ *p_speed_mask =
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_XLPPI:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+ break;
+ case ETH_TRANSCEIVER_TYPE_10G_BASET:
+ *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+ NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+ break;
+ default:
+ DP_INFO(p_hwfn, "Unknown transceiver type 0x%x\n",
+ transceiver_type);
+ *p_speed_mask = 0xff;
+ break;
+ }
+
+ return 0;
+}
+
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_board_config)
+{
+ u32 nvm_cfg_addr, nvm_cfg1_offset, port_cfg_addr;
+
+ if (IS_VF(p_hwfn->cdev))
+ return -EINVAL;
+
+ if (!qed_mcp_is_init(p_hwfn)) {
+ DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
+ return -EBUSY;
+ }
+ if (!p_ptt) {
+ *p_board_config = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+ return -EINVAL;
+ }
+
+ nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+ nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+ port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+ offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
+ *p_board_config = qed_rd(p_hwfn, p_ptt,
+ port_cfg_addr +
+ offsetof(struct nvm_cfg1_port,
+ board_cfg));
return 0;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 85e6b3989e7a..1adfe52b3905 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -322,14 +322,61 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
* @brief Get media type value of the port.
*
* @param cdev - qed dev pointer
+ * @param p_ptt
* @param mfw_ver - media type value
*
* @return int -
* 0 - Operation was successul.
* -EBUSY - Operation failed
*/
-int qed_mcp_get_media_type(struct qed_dev *cdev,
- u32 *media_type);
+int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *media_type);
+
+/**
+ * @brief Get transceiver data of the port.
+ *
+ * @param cdev - qed dev pointer
+ * @param p_ptt
+ * @param p_transceiver_state - transceiver state.
+ * @param p_transceiver_type - media type value
+ *
+ * @return int -
+ * 0 - Operation was successful.
+ * -EBUSY - Operation failed
+ */
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 *p_transceiver_state,
+ u32 *p_tranceiver_type);
+
+/**
+ * @brief Get transceiver supported speed mask.
+ *
+ * @param cdev - qed dev pointer
+ * @param p_ptt
+ * @param p_speed_mask - Bit mask of all supported speeds.
+ *
+ * @return int -
+ * 0 - Operation was successful.
+ * -EBUSY - Operation failed
+ */
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_speed_mask);
+
+/**
+ * @brief Get board configuration.
+ *
+ * @param cdev - qed dev pointer
+ * @param p_ptt
+ * @param p_board_config - Board config.
+ *
+ * @return int -
+ * 0 - Operation was successful.
+ * -EBUSY - Operation failed
+ */
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 *p_board_config);
/**
* @brief General function for sending commands to the MCP
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index be118d057b92..b6cccf44bf40 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1688,7 +1688,7 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn)
ops->ports_update(cookie, vxlan_port, geneve_port);
/* Always update link configuration according to bulletin */
- qed_link_update(hwfn);
+ qed_link_update(hwfn, NULL);
}
void qed_iov_vf_task(struct work_struct *work)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 7ff50b4488f6..8cbbd628fd73 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -413,19 +413,42 @@ struct qede_link_mode_mapping {
};
static const struct qede_link_mode_mapping qed_lm_map[] = {
- {QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
- {QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+ {QED_LM_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT},
+ {QED_LM_2500baseX_Full_BIT, ETHTOOL_LINK_MODE_2500baseX_Full_BIT},
+ {QED_LM_Backplane_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
+ {QED_LM_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT},
+ {QED_LM_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT},
{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+ {QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+ {QED_LM_10000baseR_FEC_BIT, ETHTOOL_LINK_MODE_10000baseR_FEC_BIT},
{QED_LM_20000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT},
- {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+ {QED_LM_40000baseKR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT},
+ {QED_LM_40000baseCR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT},
+ {QED_LM_40000baseSR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT},
{QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+ {QED_LM_25000baseCR_Full_BIT, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT},
+ {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+ {QED_LM_25000baseSR_Full_BIT, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT},
+ {QED_LM_50000baseCR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT},
{QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
{QED_LM_100000baseKR4_Full_BIT,
- ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+ {QED_LM_100000baseSR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT},
+ {QED_LM_100000baseCR4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT},
+ {QED_LM_100000baseLR4_ER4_Full_BIT,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT},
+ {QED_LM_50000baseSR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT},
+ {QED_LM_1000baseX_Full_BIT, ETHTOOL_LINK_MODE_1000baseX_Full_BIT},
+ {QED_LM_10000baseCR_Full_BIT, ETHTOOL_LINK_MODE_10000baseCR_Full_BIT},
+ {QED_LM_10000baseSR_Full_BIT, ETHTOOL_LINK_MODE_10000baseSR_Full_BIT},
+ {QED_LM_10000baseLR_Full_BIT, ETHTOOL_LINK_MODE_10000baseLR_Full_BIT},
+ {QED_LM_10000baseLRM_Full_BIT, ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT},
};
#define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name) \
@@ -495,6 +518,7 @@ static int qede_set_link_ksettings(struct net_device *dev,
struct qede_dev *edev = netdev_priv(dev);
struct qed_link_output current_link;
struct qed_link_params params;
+ u32 sup_caps;
if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
DP_INFO(edev, "Link settings are not allowed to be changed\n");
@@ -521,60 +545,85 @@ static int qede_set_link_ksettings(struct net_device *dev,
params.forced_speed = base->speed;
switch (base->speed) {
case SPEED_1000:
- if (!(current_link.supported_caps &
- QED_LM_1000baseT_Full_BIT)) {
+ sup_caps = QED_LM_1000baseT_Full_BIT |
+ QED_LM_1000baseKX_Full_BIT |
+ QED_LM_1000baseX_Full_BIT;
+ if (!(current_link.supported_caps & sup_caps)) {
DP_INFO(edev, "1G speed not supported\n");
return -EINVAL;
}
- params.adv_speeds = QED_LM_1000baseT_Full_BIT;
+ params.adv_speeds = current_link.supported_caps &
+ sup_caps;
break;
case SPEED_10000:
- if (!(current_link.supported_caps &
- QED_LM_10000baseKR_Full_BIT)) {
+ sup_caps = QED_LM_10000baseT_Full_BIT |
+ QED_LM_10000baseKR_Full_BIT |
+ QED_LM_10000baseKX4_Full_BIT |
+ QED_LM_10000baseR_FEC_BIT |
+ QED_LM_10000baseCR_Full_BIT |
+ QED_LM_10000baseSR_Full_BIT |
+ QED_LM_10000baseLR_Full_BIT |
+ QED_LM_10000baseLRM_Full_BIT;
+ if (!(current_link.supported_caps & sup_caps)) {
DP_INFO(edev, "10G speed not supported\n");
return -EINVAL;
}
- params.adv_speeds = QED_LM_10000baseKR_Full_BIT;
+ params.adv_speeds = current_link.supported_caps &
+ sup_caps;
break;
case SPEED_20000:
if (!(current_link.supported_caps &
- QED_LM_20000baseKR2_Full_BIT)) {
+ QED_LM_20000baseKR2_Full_BIT)) {
DP_INFO(edev, "20G speed not supported\n");
return -EINVAL;
}
params.adv_speeds = QED_LM_20000baseKR2_Full_BIT;
break;
case SPEED_25000:
- if (!(current_link.supported_caps &
- QED_LM_25000baseKR_Full_BIT)) {
+ sup_caps = QED_LM_25000baseKR_Full_BIT |
+ QED_LM_25000baseCR_Full_BIT |
+ QED_LM_25000baseSR_Full_BIT;
+ if (!(current_link.supported_caps & sup_caps)) {
DP_INFO(edev, "25G speed not supported\n");
return -EINVAL;
}
- params.adv_speeds = QED_LM_25000baseKR_Full_BIT;
+ params.adv_speeds = current_link.supported_caps &
+ sup_caps;
break;
case SPEED_40000:
- if (!(current_link.supported_caps &
- QED_LM_40000baseLR4_Full_BIT)) {
+ sup_caps = QED_LM_40000baseLR4_Full_BIT |
+ QED_LM_40000baseKR4_Full_BIT |
+ QED_LM_40000baseCR4_Full_BIT |
+ QED_LM_40000baseSR4_Full_BIT;
+ if (!(current_link.supported_caps & sup_caps)) {
DP_INFO(edev, "40G speed not supported\n");
return -EINVAL;
}
- params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
+ params.adv_speeds = current_link.supported_caps &
+ sup_caps;
break;
case SPEED_50000:
- if (!(current_link.supported_caps &
- QED_LM_50000baseKR2_Full_BIT)) {
+ sup_caps = QED_LM_50000baseKR2_Full_BIT |
+ QED_LM_50000baseCR2_Full_BIT |
+ QED_LM_50000baseSR2_Full_BIT;
+ if (!(current_link.supported_caps & sup_caps)) {
DP_INFO(edev, "50G speed not supported\n");
return -EINVAL;
}
- params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
+ params.adv_speeds = current_link.supported_caps &
+ sup_caps;
break;
case SPEED_100000:
- if (!(current_link.supported_caps &
- QED_LM_100000baseKR4_Full_BIT)) {
+ sup_caps = QED_LM_100000baseKR4_Full_BIT |
+ QED_LM_100000baseSR4_Full_BIT |
+ QED_LM_100000baseCR4_Full_BIT |
+ QED_LM_100000baseLR4_ER4_Full_BIT;
+ if (!(current_link.supported_caps & sup_caps)) {
DP_INFO(edev, "100G speed not supported\n");
return -EINVAL;
}
- params.adv_speeds = QED_LM_100000baseKR4_Full_BIT;
+ params.adv_speeds = current_link.supported_caps &
+ sup_caps;
break;
default:
DP_INFO(edev, "Unsupported speed %u\n", base->speed);
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b48f76182049..10b075bc5959 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev)
qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1;
ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data);
- ql_write_nvram_reg(qdev, spir,
- ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data));
}
/*
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 8c4f49adcf9e..006b0aa8cec3 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -631,7 +631,6 @@ struct rtl8169_tc_offsets {
enum rtl_flag {
RTL_FLAG_TASK_ENABLED = 0,
- RTL_FLAG_TASK_SLOW_PENDING,
RTL_FLAG_TASK_RESET_PENDING,
RTL_FLAG_MAX
};
@@ -5852,6 +5851,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
{
rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC);
tp->cur_tx = tp->dirty_tx = 0;
+ netdev_reset_queue(tp->dev);
}
static void rtl_reset_work(struct rtl8169_private *tp)
@@ -6154,6 +6154,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
txd->opts2 = cpu_to_le32(opts[1]);
+ netdev_sent_queue(dev, skb->len);
+
skb_tx_timestamp(skb);
/* Force memory writes to complete before releasing descriptor */
@@ -6252,7 +6254,7 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
{
- unsigned int dirty_tx, tx_left;
+ unsigned int dirty_tx, tx_left, bytes_compl = 0, pkts_compl = 0;
dirty_tx = tp->dirty_tx;
smp_rmb();
@@ -6276,10 +6278,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
tp->TxDescArray + entry);
if (status & LastFrag) {
- u64_stats_update_begin(&tp->tx_stats.syncp);
- tp->tx_stats.packets++;
- tp->tx_stats.bytes += tx_skb->skb->len;
- u64_stats_update_end(&tp->tx_stats.syncp);
+ pkts_compl++;
+ bytes_compl += tx_skb->skb->len;
dev_consume_skb_any(tx_skb->skb);
tx_skb->skb = NULL;
}
@@ -6288,6 +6288,13 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
}
if (tp->dirty_tx != dirty_tx) {
+ netdev_completed_queue(dev, pkts_compl, bytes_compl);
+
+ u64_stats_update_begin(&tp->tx_stats.syncp);
+ tp->tx_stats.packets += pkts_compl;
+ tp->tx_stats.bytes += bytes_compl;
+ u64_stats_update_end(&tp->tx_stats.syncp);
+
tp->dirty_tx = dirty_tx;
/* Sync with rtl8169_start_xmit:
* - publish dirty_tx ring index (write barrier)
@@ -6452,42 +6459,29 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
if (status == 0xffff || !(status & (RTL_EVENT_NAPI | tp->event_slow)))
return IRQ_NONE;
- rtl_irq_disable(tp);
- napi_schedule_irqoff(&tp->napi);
-
- return IRQ_HANDLED;
-}
-
-/*
- * Workqueue context.
- */
-static void rtl_slow_event_work(struct rtl8169_private *tp)
-{
- struct net_device *dev = tp->dev;
- u16 status;
+ if (unlikely(status & SYSErr)) {
+ rtl8169_pcierr_interrupt(tp->dev);
+ goto out;
+ }
- status = rtl_get_events(tp) & tp->event_slow;
- rtl_ack_events(tp, status);
+ if (status & LinkChg)
+ phy_mac_interrupt(tp->dev->phydev);
- if (unlikely(status & RxFIFOOver)) {
- switch (tp->mac_version) {
- /* Work around for rx fifo overflow */
- case RTL_GIGA_MAC_VER_11:
- netif_stop_queue(dev);
- /* XXX - Hack alert. See rtl_task(). */
- set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
- default:
- break;
- }
+ if (unlikely(status & RxFIFOOver &&
+ tp->mac_version == RTL_GIGA_MAC_VER_11)) {
+ netif_stop_queue(tp->dev);
+ /* XXX - Hack alert. See rtl_task(). */
+ set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags);
}
- if (unlikely(status & SYSErr))
- rtl8169_pcierr_interrupt(dev);
-
- if (status & LinkChg)
- phy_mac_interrupt(dev->phydev);
+ if (status & RTL_EVENT_NAPI) {
+ rtl_irq_disable(tp);
+ napi_schedule_irqoff(&tp->napi);
+ }
+out:
+ rtl_ack_events(tp, status);
- rtl_irq_enable_all(tp);
+ return IRQ_HANDLED;
}
static void rtl_task(struct work_struct *work)
@@ -6496,8 +6490,6 @@ static void rtl_task(struct work_struct *work)
int bitnr;
void (*action)(struct rtl8169_private *);
} rtl_work[] = {
- /* XXX - keep rtl_slow_event_work() as first element. */
- { RTL_FLAG_TASK_SLOW_PENDING, rtl_slow_event_work },
{ RTL_FLAG_TASK_RESET_PENDING, rtl_reset_work },
};
struct rtl8169_private *tp =
@@ -6527,29 +6519,16 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
{
struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
struct net_device *dev = tp->dev;
- u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow;
- int work_done= 0;
- u16 status;
-
- status = rtl_get_events(tp);
- rtl_ack_events(tp, status & ~tp->event_slow);
+ int work_done;
- if (status & RTL_EVENT_NAPI_RX)
- work_done = rtl_rx(dev, tp, (u32) budget);
+ work_done = rtl_rx(dev, tp, (u32) budget);
- if (status & RTL_EVENT_NAPI_TX)
- rtl_tx(dev, tp);
-
- if (status & tp->event_slow) {
- enable_mask &= ~tp->event_slow;
-
- rtl_schedule_task(tp, RTL_FLAG_TASK_SLOW_PENDING);
- }
+ rtl_tx(dev, tp);
if (work_done < budget) {
napi_complete_done(napi, work_done);
- rtl_irq_enable(tp, enable_mask);
+ rtl_irq_enable_all(tp);
mmiowb();
}
@@ -7071,20 +7050,12 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
{
unsigned int flags;
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
RTL_W8(tp, Cfg9346, Cfg9346_Lock);
flags = PCI_IRQ_LEGACY;
- break;
- case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_40:
- /* This version was reported to have issues with resume
- * from suspend when using MSI-X
- */
- flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI;
- break;
- default:
+ } else {
flags = PCI_IRQ_ALL_TYPES;
}
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index aeafdb9ac015..beb06628f22d 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -371,7 +371,7 @@ static void rocker_desc_cookie_ptr_set(const struct rocker_desc_info *desc_info,
static struct rocker_desc_info *
rocker_desc_head_get(const struct rocker_dma_ring_info *info)
{
- static struct rocker_desc_info *desc_info;
+ struct rocker_desc_info *desc_info;
u32 head = __pos_inc(info->head, info->size);
desc_info = &info->desc_info[info->head];
@@ -402,7 +402,7 @@ static void rocker_desc_head_set(const struct rocker *rocker,
static struct rocker_desc_info *
rocker_desc_tail_get(struct rocker_dma_ring_info *info)
{
- static struct rocker_desc_info *desc_info;
+ struct rocker_desc_info *desc_info;
if (info->tail == info->head)
return NULL; /* nothing to be done between head and tail */
@@ -2728,6 +2728,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port,
info.addr = recv_info->addr;
info.vid = recv_info->vid;
+ info.offloaded = true;
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
rocker_port->dev, &info.info);
}
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 8d6cff8bd162..4823b6a51134 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2447,8 +2447,7 @@ static int smc_drv_remove(struct platform_device *pdev)
static int smc_drv_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
if (ndev) {
if (netif_running(ndev)) {
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 226be2a56c1f..6cf4d4cb152e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -3669,8 +3669,7 @@ static int cpsw_remove(struct platform_device *pdev)
#ifdef CONFIG_PM_SLEEP
static int cpsw_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
if (cpsw->data.dual_emac) {
@@ -3693,8 +3692,7 @@ static int cpsw_suspend(struct device *dev)
static int cpsw_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
/* Select default pin state */
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index f270beebb428..9153db120352 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -2002,8 +2002,7 @@ static int davinci_emac_remove(struct platform_device *pdev)
static int davinci_emac_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
if (netif_running(ndev))
emac_dev_stop(ndev);
@@ -2013,8 +2012,7 @@ static int davinci_emac_suspend(struct device *dev)
static int davinci_emac_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
if (netif_running(ndev))
emac_dev_open(ndev);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 80fdbff67d82..f9da5d6172e3 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -661,8 +661,7 @@ static int w5300_remove(struct platform_device *pdev)
#ifdef CONFIG_PM_SLEEP
static int w5300_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
struct w5300_priv *priv = netdev_priv(ndev);
if (netif_running(ndev)) {
@@ -676,8 +675,7 @@ static int w5300_suspend(struct device *dev)
static int w5300_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct net_device *ndev = platform_get_drvdata(pdev);
+ struct net_device *ndev = dev_get_drvdata(dev);
struct w5300_priv *priv = netdev_priv(ndev);
if (!netif_running(ndev)) {
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 82eccc930c5c..a0cd1c41cf5f 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -831,12 +831,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(rt))
return PTR_ERR(rt);
- if (skb_dst(skb)) {
- int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
- info->options_len;
-
- skb_dst_update_pmtu(skb, mtu);
- }
+ skb_tunnel_check_pmtu(skb, &rt->dst,
+ GENEVE_IPV4_HLEN + info->options_len);
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->collect_md) {
@@ -881,11 +877,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(dst))
return PTR_ERR(dst);
- if (skb_dst(skb)) {
- int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
-
- skb_dst_update_pmtu(skb, mtu);
- }
+ skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->collect_md) {
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a7207fa7e451..2df7f60fe052 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -69,6 +69,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
int len;
skb_tx_timestamp(skb);
+
+ /* do not fool net_timestamp_check() with various clock bases */
+ skb->tstamp = 0;
+
skb_orphan(skb);
/* Before queueing this packet to netif_rx(),
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cfda146f3b3b..fc8d5f1ee1ad 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1077,7 +1077,7 @@ static void macvlan_dev_netpoll_cleanup(struct net_device *dev)
vlan->netpoll = NULL;
- __netpoll_free_async(netpoll);
+ __netpoll_free(netpoll);
}
#endif /* CONFIG_NET_POLL_CONTROLLER */
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index c017486e9b86..696bdf1e4576 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -289,8 +289,7 @@ static int mdio_mux_iproc_remove(struct platform_device *pdev)
#ifdef CONFIG_PM_SLEEP
static int mdio_mux_iproc_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+ struct iproc_mdiomux_desc *md = dev_get_drvdata(dev);
clk_disable_unprepare(md->core_clk);
@@ -299,8 +298,7 @@ static int mdio_mux_iproc_suspend(struct device *dev)
static int mdio_mux_iproc_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev);
+ struct iproc_mdiomux_desc *md = dev_get_drvdata(dev);
clk_prepare_enable(md->core_clk);
mdio_mux_iproc_config(md);
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3db06b40580d..9265dea79412 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -14,7 +14,7 @@
* option) any later version.
*
* Support : Micrel Phys:
- * Giga phys: ksz9021, ksz9031
+ * Giga phys: ksz9021, ksz9031, ksz9131
* 100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041
* ksz8021, ksz8031, ksz8051,
* ksz8081, ksz8091,
@@ -609,6 +609,116 @@ err_force_master:
return result;
}
+#define KSZ9131_SKEW_5BIT_MAX 2400
+#define KSZ9131_SKEW_4BIT_MAX 800
+#define KSZ9131_OFFSET 700
+#define KSZ9131_STEP 100
+
+static int ksz9131_of_load_skew_values(struct phy_device *phydev,
+ struct device_node *of_node,
+ u16 reg, size_t field_sz,
+ char *field[], u8 numfields)
+{
+ int val[4] = {-(1 + KSZ9131_OFFSET), -(2 + KSZ9131_OFFSET),
+ -(3 + KSZ9131_OFFSET), -(4 + KSZ9131_OFFSET)};
+ int skewval, skewmax = 0;
+ int matches = 0;
+ u16 maxval;
+ u16 newval;
+ u16 mask;
+ int i;
+
+ /* psec properties in dts should mean x pico seconds */
+ if (field_sz == 5)
+ skewmax = KSZ9131_SKEW_5BIT_MAX;
+ else
+ skewmax = KSZ9131_SKEW_4BIT_MAX;
+
+ for (i = 0; i < numfields; i++)
+ if (!of_property_read_s32(of_node, field[i], &skewval)) {
+ if (skewval < -KSZ9131_OFFSET)
+ skewval = -KSZ9131_OFFSET;
+ else if (skewval > skewmax)
+ skewval = skewmax;
+
+ val[i] = skewval + KSZ9131_OFFSET;
+ matches++;
+ }
+
+ if (!matches)
+ return 0;
+
+ if (matches < numfields)
+ newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg);
+ else
+ newval = 0;
+
+ maxval = (field_sz == 4) ? 0xf : 0x1f;
+ for (i = 0; i < numfields; i++)
+ if (val[i] != -(i + 1 + KSZ9131_OFFSET)) {
+ mask = 0xffff;
+ mask ^= maxval << (field_sz * i);
+ newval = (newval & mask) |
+ (((val[i] / KSZ9131_STEP) & maxval)
+ << (field_sz * i));
+ }
+
+ return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval);
+}
+
+static int ksz9131_config_init(struct phy_device *phydev)
+{
+ const struct device *dev = &phydev->mdio.dev;
+ struct device_node *of_node = dev->of_node;
+ char *clk_skews[2] = {"rxc-skew-psec", "txc-skew-psec"};
+ char *rx_data_skews[4] = {
+ "rxd0-skew-psec", "rxd1-skew-psec",
+ "rxd2-skew-psec", "rxd3-skew-psec"
+ };
+ char *tx_data_skews[4] = {
+ "txd0-skew-psec", "txd1-skew-psec",
+ "txd2-skew-psec", "txd3-skew-psec"
+ };
+ char *control_skews[2] = {"txen-skew-psec", "rxdv-skew-psec"};
+ const struct device *dev_walker;
+ int ret;
+
+ dev_walker = &phydev->mdio.dev;
+ do {
+ of_node = dev_walker->of_node;
+ dev_walker = dev_walker->parent;
+ } while (!of_node && dev_walker);
+
+ if (!of_node)
+ return 0;
+
+ ret = ksz9131_of_load_skew_values(phydev, of_node,
+ MII_KSZ9031RN_CLK_PAD_SKEW, 5,
+ clk_skews, 2);
+ if (ret < 0)
+ return ret;
+
+ ret = ksz9131_of_load_skew_values(phydev, of_node,
+ MII_KSZ9031RN_CONTROL_PAD_SKEW, 4,
+ control_skews, 2);
+ if (ret < 0)
+ return ret;
+
+ ret = ksz9131_of_load_skew_values(phydev, of_node,
+ MII_KSZ9031RN_RX_DATA_PAD_SKEW, 4,
+ rx_data_skews, 4);
+ if (ret < 0)
+ return ret;
+
+ ret = ksz9131_of_load_skew_values(phydev, of_node,
+ MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
+ tx_data_skews, 4);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
#define KSZ8873MLL_GLOBAL_CONTROL_4 0x06
#define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX BIT(6)
#define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED BIT(4)
@@ -975,6 +1085,23 @@ static struct phy_driver ksphy_driver[] = {
.suspend = genphy_suspend,
.resume = kszphy_resume,
}, {
+ .phy_id = PHY_ID_KSZ9131,
+ .phy_id_mask = MICREL_PHY_ID_MASK,
+ .name = "Microchip KSZ9131 Gigabit PHY",
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .driver_data = &ksz9021_type,
+ .probe = kszphy_probe,
+ .config_init = ksz9131_config_init,
+ .read_status = ksz9031_read_status,
+ .ack_interrupt = kszphy_ack_interrupt,
+ .config_intr = kszphy_config_intr,
+ .get_sset_count = kszphy_get_sset_count,
+ .get_strings = kszphy_get_strings,
+ .get_stats = kszphy_get_stats,
+ .suspend = genphy_suspend,
+ .resume = kszphy_resume,
+}, {
.phy_id = PHY_ID_KSZ8873MLL,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8873MLL Switch",
@@ -1022,6 +1149,7 @@ MODULE_LICENSE("GPL");
static struct mdio_device_id __maybe_unused micrel_tbl[] = {
{ PHY_ID_KSZ9021, 0x000ffffe },
{ PHY_ID_KSZ9031, MICREL_PHY_ID_MASK },
+ { PHY_ID_KSZ9131, MICREL_PHY_ID_MASK },
{ PHY_ID_KSZ8001, 0x00fffffc },
{ PHY_ID_KS8737, MICREL_PHY_ID_MASK },
{ PHY_ID_KSZ8021, 0x00ffffff },
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index bffe077dc75f..a2e59f4f6f01 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -522,7 +522,7 @@ static int vsc85xx_mdix_set(struct phy_device *phydev, u8 mdix)
static int vsc85xx_downshift_get(struct phy_device *phydev, u8 *count)
{
- u16 reg_val;
+ int reg_val;
reg_val = phy_read_paged(phydev, MSCC_PHY_PAGE_EXTENDED,
MSCC_PHY_ACTIPHY_CNTL);
@@ -1292,7 +1292,7 @@ static int vsc8574_config_pre_init(struct phy_device *phydev)
dev_err(dev,
"%s: failed to assert reset of micro\n",
__func__);
- return ret;
+ goto out;
}
}
} else {
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d887016e54b6..db633ae9f784 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1104,10 +1104,7 @@ static void team_port_disable_netpoll(struct team_port *port)
return;
port->np = NULL;
- /* Wait for transmitting packets to finish before freeing. */
- synchronize_rcu_bh();
- __netpoll_cleanup(np);
- kfree(np);
+ __netpoll_free(np);
}
#else
static int team_port_enable_netpoll(struct team_port *port)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3f5aa59c37b7..3e2c041d76ac 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2267,8 +2267,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
+ netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
- netif_tx_disable(vi->dev);
+ netif_tx_unlock_bh(vi->dev);
cancel_delayed_work_sync(&vi->refill);
if (netif_running(vi->dev)) {
@@ -2304,7 +2305,9 @@ static int virtnet_restore_up(struct virtio_device *vdev)
}
}
+ netif_tx_lock_bh(vi->dev);
netif_device_attach(vi->dev);
+ netif_tx_unlock_bh(vi->dev);
return err;
}
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 018406c4d944..297cdeaef479 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -103,22 +103,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
- if (ipa->sa.sa_family == AF_INET6)
- return ipv6_addr_any(&ipa->sin6.sin6_addr);
- else
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
- if (ipa->sa.sa_family == AF_INET6)
- return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
- else
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
{
if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -151,16 +135,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
{
if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -298,6 +272,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
ndm->ndm_state = fdb->state;
ndm->ndm_ifindex = vxlan->dev->ifindex;
ndm->ndm_flags = fdb->flags;
+ if (rdst->offloaded)
+ ndm->ndm_flags |= NTF_OFFLOADED;
ndm->ndm_type = RTN_UNICAST;
if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
@@ -353,8 +329,8 @@ static inline size_t vxlan_nlmsg_size(void)
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
-static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
- struct vxlan_rdst *rd, int type)
+static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd, int type)
{
struct net *net = dev_net(vxlan->dev);
struct sk_buff *skb;
@@ -379,6 +355,49 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
+static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
+ struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd,
+ bool adding)
+{
+ struct switchdev_notifier_vxlan_fdb_info info;
+ enum switchdev_notifier_type notifier_type;
+
+ if (WARN_ON(!rd))
+ return;
+
+ notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
+ : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
+
+ info = (struct switchdev_notifier_vxlan_fdb_info){
+ .remote_ip = rd->remote_ip,
+ .remote_port = rd->remote_port,
+ .remote_vni = rd->remote_vni,
+ .remote_ifindex = rd->remote_ifindex,
+ .vni = fdb->vni,
+ .offloaded = rd->offloaded,
+ };
+ memcpy(info.eth_addr, fdb->eth_addr, ETH_ALEN);
+
+ call_switchdev_notifiers(notifier_type, vxlan->dev,
+ &info.info);
+}
+
+static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd, int type)
+{
+ switch (type) {
+ case RTM_NEWNEIGH:
+ vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, true);
+ break;
+ case RTM_DELNEIGH:
+ vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, false);
+ break;
+ }
+
+ __vxlan_fdb_notify(vxlan, fdb, rd, type);
+}
+
static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -488,6 +507,47 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
return NULL;
}
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ u8 eth_addr[ETH_ALEN + 2] = { 0 };
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ int rc = 0;
+
+ if (is_multicast_ether_addr(mac) ||
+ is_zero_ether_addr(mac))
+ return -EINVAL;
+
+ ether_addr_copy(eth_addr, mac);
+
+ rcu_read_lock();
+
+ f = __vxlan_find_mac(vxlan, eth_addr, vni);
+ if (!f) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ rdst = first_remote_rcu(f);
+
+ memset(fdb_info, 0, sizeof(*fdb_info));
+ fdb_info->info.dev = dev;
+ fdb_info->remote_ip = rdst->remote_ip;
+ fdb_info->remote_port = rdst->remote_port;
+ fdb_info->remote_vni = rdst->remote_vni;
+ fdb_info->remote_ifindex = rdst->remote_ifindex;
+ fdb_info->vni = vni;
+ fdb_info->offloaded = rdst->offloaded;
+ ether_addr_copy(fdb_info->eth_addr, mac);
+
+out:
+ rcu_read_unlock();
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
+
/* Replace destination of unicast mac */
static int vxlan_fdb_replace(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port, __be32 vni,
@@ -533,6 +593,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
rd->remote_ip = *ip;
rd->remote_port = port;
+ rd->offloaded = false;
rd->remote_vni = vni;
rd->remote_ifindex = ifindex;
@@ -782,12 +843,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
bool do_notify)
{
+ struct vxlan_rdst *rd;
+
netdev_dbg(vxlan->dev,
"delete %pM\n", f->eth_addr);
--vxlan->addrcnt;
if (do_notify)
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+ list_for_each_entry(rd, &f->remotes, list)
+ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free);
@@ -2198,11 +2262,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ndst = &rt->dst;
- if (skb_dst(skb)) {
- int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
-
- skb_dst_update_pmtu(skb, mtu);
- }
+ skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2239,11 +2299,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto out_unlock;
}
- if (skb_dst(skb)) {
- int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
-
- skb_dst_update_pmtu(skb, mtu);
- }
+ skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
@@ -3761,6 +3817,51 @@ static struct notifier_block vxlan_notifier_block __read_mostly = {
.notifier_call = vxlan_netdevice_event,
};
+static void
+vxlan_fdb_offloaded_set(struct net_device *dev,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+
+ spin_lock_bh(&vxlan->hash_lock);
+
+ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ if (!f)
+ goto out;
+
+ rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
+ fdb_info->remote_port,
+ fdb_info->remote_vni,
+ fdb_info->remote_ifindex);
+ if (!rdst)
+ goto out;
+
+ rdst->offloaded = fdb_info->offloaded;
+
+out:
+ spin_unlock_bh(&vxlan->hash_lock);
+}
+
+static int vxlan_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+ vxlan_fdb_offloaded_set(dev, ptr);
+ break;
+ }
+
+ return 0;
+}
+
+static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
+ .notifier_call = vxlan_switchdev_event,
+};
+
static __net_init int vxlan_init_net(struct net *net)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -3834,11 +3935,17 @@ static int __init vxlan_init_module(void)
if (rc)
goto out2;
- rc = rtnl_link_register(&vxlan_link_ops);
+ rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
if (rc)
goto out3;
+ rc = rtnl_link_register(&vxlan_link_ops);
+ if (rc)
+ goto out4;
+
return 0;
+out4:
+ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
out3:
unregister_netdevice_notifier(&vxlan_notifier_block);
out2:
@@ -3851,6 +3958,7 @@ late_initcall(vxlan_init_module);
static void __exit vxlan_cleanup_module(void)
{
rtnl_link_unregister(&vxlan_link_ops);
+ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
unregister_netdevice_notifier(&vxlan_notifier_block);
unregister_pernet_subsys(&vxlan_net_ops);
/* rcu_barrier() is called by netns */
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dd8ec1dd9219..6bb9908bf46f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3143,8 +3143,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
}
mutex_lock(&ns->ctrl->subsys->lock);
- nvme_mpath_clear_current_path(ns);
list_del_rcu(&ns->siblings);
+ nvme_mpath_clear_current_path(ns);
mutex_unlock(&ns->ctrl->subsys->lock);
down_write(&ns->ctrl->namespaces_rwsem);
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 7f01f6f60b87..d0b7dd8fb184 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
unsigned int cpu = smp_processor_id();
- return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+ int ret;
+
+ ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+ if (ret && armpmu->filter_match)
+ return armpmu->filter_match(event);
+
+ return ret;
}
static ssize_t armpmu_cpumask_show(struct device *dev,
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 01b0e2bb3319..2012551d93e0 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -24,6 +24,8 @@
#include <linux/slab.h>
#include <linux/timekeeping.h>
+#include <linux/nospec.h>
+
#include "ptp_private.h"
static int ptp_disable_pinfunc(struct ptp_clock_info *ops,
@@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
err = -EINVAL;
break;
}
+ pin_index = array_index_nospec(pin_index, ops->n_pins);
if (mutex_lock_interruptible(&ptp->pincfg_mux))
return -ERESTARTSYS;
pd = ops->pin_config[pin_index];
@@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
err = -EINVAL;
break;
}
+ pin_index = array_index_nospec(pin_index, ops->n_pins);
if (mutex_lock_interruptible(&ptp->pincfg_mux))
return -ERESTARTSYS;
err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index bc03b0a690b4..9ede35cecb12 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -310,17 +310,17 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf)
if (difference & ACM_CTRL_DSR)
acm->iocount.dsr++;
- if (difference & ACM_CTRL_BRK)
- acm->iocount.brk++;
- if (difference & ACM_CTRL_RI)
- acm->iocount.rng++;
if (difference & ACM_CTRL_DCD)
acm->iocount.dcd++;
- if (difference & ACM_CTRL_FRAMING)
+ if (newctrl & ACM_CTRL_BRK)
+ acm->iocount.brk++;
+ if (newctrl & ACM_CTRL_RI)
+ acm->iocount.rng++;
+ if (newctrl & ACM_CTRL_FRAMING)
acm->iocount.frame++;
- if (difference & ACM_CTRL_PARITY)
+ if (newctrl & ACM_CTRL_PARITY)
acm->iocount.parity++;
- if (difference & ACM_CTRL_OVERRUN)
+ if (newctrl & ACM_CTRL_OVERRUN)
acm->iocount.overrun++;
spin_unlock_irqrestore(&acm->read_lock, flags);
@@ -355,7 +355,6 @@ static void acm_ctrl_irq(struct urb *urb)
case -ENOENT:
case -ESHUTDOWN:
/* this urb is terminated, clean up */
- acm->nb_index = 0;
dev_dbg(&acm->control->dev,
"%s - urb shutting down with status: %d\n",
__func__, status);
@@ -1642,6 +1641,7 @@ static int acm_pre_reset(struct usb_interface *intf)
struct acm *acm = usb_get_intfdata(intf);
clear_bit(EVENT_RX_STALL, &acm->flags);
+ acm->nb_index = 0; /* pending control transfers are lost */
return 0;
}
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 244417d0dfd1..ffccd40ea67d 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1474,8 +1474,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
u = 0;
switch (uurb->type) {
case USBDEVFS_URB_TYPE_CONTROL:
- if (is_in)
- allow_short = true;
if (!usb_endpoint_xfer_control(&ep->desc))
return -EINVAL;
/* min 8 byte setup packet */
@@ -1505,6 +1503,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
is_in = 0;
uurb->endpoint &= ~USB_DIR_IN;
}
+ if (is_in)
+ allow_short = true;
snoop(&ps->dev->dev, "control urb: bRequestType=%02x "
"bRequest=%02x wValue=%04x "
"wIndex=%04x wLength=%04x\n",
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index ca8a4b53c59f..1074cb82ec17 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -221,6 +221,8 @@
#include <linux/usb/gadget.h>
#include <linux/usb/composite.h>
+#include <linux/nospec.h>
+
#include "configfs.h"
@@ -3152,6 +3154,7 @@ static struct config_group *fsg_lun_make(struct config_group *group,
fsg_opts = to_fsg_opts(&group->cg_item);
if (num >= FSG_MAX_LUNS)
return ERR_PTR(-ERANGE);
+ num = array_index_nospec(num, FSG_MAX_LUNS);
mutex_lock(&fsg_opts->lock);
if (fsg_opts->refcnt || fsg_opts->common->luns[num]) {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 722860eb5a91..51dd8e00c4f8 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -179,10 +179,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_PME_STUCK_QUIRK;
}
if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
- pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
+ pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)
xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
+ if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+ (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI))
xhci->quirks |= XHCI_INTEL_USB_ROLE_SW;
- }
if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
(pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c
index 1fb3dd0f1dfa..277de96181f9 100644
--- a/drivers/usb/roles/intel-xhci-usb-role-switch.c
+++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c
@@ -161,6 +161,8 @@ static int intel_xhci_usb_remove(struct platform_device *pdev)
{
struct intel_xhci_usb_data *data = platform_get_drvdata(pdev);
+ pm_runtime_disable(&pdev->dev);
+
usb_role_switch_unregister(data->role_sw);
return 0;
}
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index d11f3f8dad40..1e592ec94ba4 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -318,8 +318,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
struct vhci_hcd *vhci_hcd;
struct vhci *vhci;
int retval = 0;
- int rhport;
+ int rhport = -1;
unsigned long flags;
+ bool invalid_rhport = false;
u32 prev_port_status[VHCI_HC_PORTS];
@@ -334,9 +335,19 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue,
wIndex);
- if (wIndex > VHCI_HC_PORTS)
- pr_err("invalid port number %d\n", wIndex);
- rhport = wIndex - 1;
+ /*
+ * wIndex can be 0 for some request types (typeReq). rhport is
+ * in valid range when wIndex >= 1 and < VHCI_HC_PORTS.
+ *
+ * Reference port_status[] only with valid rhport when
+ * invalid_rhport is false.
+ */
+ if (wIndex < 1 || wIndex > VHCI_HC_PORTS) {
+ invalid_rhport = true;
+ if (wIndex > VHCI_HC_PORTS)
+ pr_err("invalid port number %d\n", wIndex);
+ } else
+ rhport = wIndex - 1;
vhci_hcd = hcd_to_vhci_hcd(hcd);
vhci = vhci_hcd->vhci;
@@ -345,8 +356,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
/* store old status and compare now and old later */
if (usbip_dbg_flag_vhci_rh) {
- memcpy(prev_port_status, vhci_hcd->port_status,
- sizeof(prev_port_status));
+ if (!invalid_rhport)
+ memcpy(prev_port_status, vhci_hcd->port_status,
+ sizeof(prev_port_status));
}
switch (typeReq) {
@@ -354,8 +366,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
usbip_dbg_vhci_rh(" ClearHubFeature\n");
break;
case ClearPortFeature:
- if (rhport < 0)
+ if (invalid_rhport) {
+ pr_err("invalid port number %d\n", wIndex);
goto error;
+ }
switch (wValue) {
case USB_PORT_FEAT_SUSPEND:
if (hcd->speed == HCD_USB3) {
@@ -415,9 +429,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
break;
case GetPortStatus:
usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex);
- if (wIndex < 1) {
+ if (invalid_rhport) {
pr_err("invalid port number %d\n", wIndex);
retval = -EPIPE;
+ goto error;
}
/* we do not care about resume. */
@@ -513,16 +528,20 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
goto error;
}
- if (rhport < 0)
+ if (invalid_rhport) {
+ pr_err("invalid port number %d\n", wIndex);
goto error;
+ }
vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND;
break;
case USB_PORT_FEAT_POWER:
usbip_dbg_vhci_rh(
" SetPortFeature: USB_PORT_FEAT_POWER\n");
- if (rhport < 0)
+ if (invalid_rhport) {
+ pr_err("invalid port number %d\n", wIndex);
goto error;
+ }
if (hcd->speed == HCD_USB3)
vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER;
else
@@ -531,8 +550,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
case USB_PORT_FEAT_BH_PORT_RESET:
usbip_dbg_vhci_rh(
" SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n");
- if (rhport < 0)
+ if (invalid_rhport) {
+ pr_err("invalid port number %d\n", wIndex);
goto error;
+ }
/* Applicable only for USB3.0 hub */
if (hcd->speed != HCD_USB3) {
pr_err("USB_PORT_FEAT_BH_PORT_RESET req not "
@@ -543,8 +564,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
case USB_PORT_FEAT_RESET:
usbip_dbg_vhci_rh(
" SetPortFeature: USB_PORT_FEAT_RESET\n");
- if (rhport < 0)
+ if (invalid_rhport) {
+ pr_err("invalid port number %d\n", wIndex);
goto error;
+ }
/* if it's already enabled, disable */
if (hcd->speed == HCD_USB3) {
vhci_hcd->port_status[rhport] = 0;
@@ -565,8 +588,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
default:
usbip_dbg_vhci_rh(" SetPortFeature: default %d\n",
wValue);
- if (rhport < 0)
+ if (invalid_rhport) {
+ pr_err("invalid port number %d\n", wIndex);
goto error;
+ }
if (hcd->speed == HCD_USB3) {
if ((vhci_hcd->port_status[rhport] &
USB_SS_PORT_STAT_POWER) != 0) {
@@ -608,7 +633,7 @@ error:
if (usbip_dbg_flag_vhci_rh) {
pr_debug("port %d\n", rhport);
/* Only dump valid port status */
- if (rhport >= 0) {
+ if (!invalid_rhport) {
dump_port_status_diff(prev_port_status[rhport],
vhci_hcd->port_status[rhport],
hcd->speed == HCD_USB3);
@@ -618,8 +643,10 @@ error:
spin_unlock_irqrestore(&vhci->lock, flags);
- if ((vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0)
+ if (!invalid_rhport &&
+ (vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) {
usb_hcd_poll_rh_status(hcd);
+ }
return retval;
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 35f2ae30f31f..77a83790a31f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work)
}
if (call->state == AFS_CALL_COMPLETE) {
- call->reply[0] = NULL;
-
/* We have two refs to release - one from the alloc and one
* queued with the work item - and we can't just deallocate the
* call because the work item may be queued again.
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 2f306c0cc4ee..1d329e6981d5 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -199,11 +199,9 @@ static struct afs_server *afs_install_server(struct afs_net *net,
write_sequnlock(&net->fs_addr_lock);
ret = 0;
- goto out;
exists:
afs_get_server(server);
-out:
write_sequnlock(&net->fs_lock);
return server;
}
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index af2b17b21b94..95983c744164 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -343,7 +343,7 @@ try_again:
trap = lock_rename(cache->graveyard, dir);
/* do some checks before getting the grave dentry */
- if (rep->d_parent != dir) {
+ if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
/* the entry was probably culled when we dropped the parent dir
* lock */
unlock_rename(cache->graveyard, dir);
diff --git a/fs/dax.c b/fs/dax.c
index 4becbf168b7f..0fb270f0a0ef 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE),
indices)) {
+ pgoff_t nr_pages = 1;
+
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *pvec_ent = pvec.pages[i];
void *entry;
@@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
xa_lock_irq(&mapping->i_pages);
entry = get_unlocked_mapping_entry(mapping, index, NULL);
- if (entry)
+ if (entry) {
page = dax_busy_page(entry);
+ /*
+ * Account for multi-order entries at
+ * the end of the pagevec.
+ */
+ if (i + 1 >= pagevec_count(&pvec))
+ nr_pages = 1UL << dax_radix_order(entry);
+ }
put_unlocked_mapping_entry(mapping, index, entry);
xa_unlock_irq(&mapping->i_pages);
if (page)
@@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
*/
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- index++;
+ index += nr_pages;
if (page)
break;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index defc2168de91..f58c0cacc531 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb)
if (ops->ent_get(&fatent) == FAT_ENT_FREE)
free++;
} while (fat_ent_next(sbi, &fatent));
+ cond_resched();
}
sbi->free_clusters = free;
sbi->free_clus_valid = 1;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 83bfe04456b6..c550512ce335 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -70,20 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
}
/*
- * initialise an cookie jar slab element prior to any use
- */
-void fscache_cookie_init_once(void *_cookie)
-{
- struct fscache_cookie *cookie = _cookie;
-
- memset(cookie, 0, sizeof(*cookie));
- spin_lock_init(&cookie->lock);
- spin_lock_init(&cookie->stores_lock);
- INIT_HLIST_HEAD(&cookie->backing_objects);
-}
-
-/*
- * Set the index key in a cookie. The cookie struct has space for a 12-byte
+ * Set the index key in a cookie. The cookie struct has space for a 16-byte
* key plus length and hash, but if that's not big enough, it's instead a
* pointer to a buffer containing 3 bytes of hash, 1 byte of length and then
* the key data.
@@ -93,20 +80,18 @@ static int fscache_set_key(struct fscache_cookie *cookie,
{
unsigned long long h;
u32 *buf;
+ int bufs;
int i;
- cookie->key_len = index_key_len;
+ bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));
if (index_key_len > sizeof(cookie->inline_key)) {
- buf = kzalloc(index_key_len, GFP_KERNEL);
+ buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL);
if (!buf)
return -ENOMEM;
cookie->key = buf;
} else {
buf = (u32 *)cookie->inline_key;
- buf[0] = 0;
- buf[1] = 0;
- buf[2] = 0;
}
memcpy(buf, index_key, index_key_len);
@@ -116,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie,
*/
h = (unsigned long)cookie->parent;
h += index_key_len + cookie->type;
- for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++)
+
+ for (i = 0; i < bufs; i++)
h += buf[i];
cookie->key_hash = h ^ (h >> 32);
@@ -161,7 +147,7 @@ struct fscache_cookie *fscache_alloc_cookie(
struct fscache_cookie *cookie;
/* allocate and initialise a cookie */
- cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
+ cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
if (!cookie)
return NULL;
@@ -192,6 +178,9 @@ struct fscache_cookie *fscache_alloc_cookie(
cookie->netfs_data = netfs_data;
cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET);
cookie->type = def->type;
+ spin_lock_init(&cookie->lock);
+ spin_lock_init(&cookie->stores_lock);
+ INIT_HLIST_HEAD(&cookie->backing_objects);
/* radix tree insertion won't use the preallocation pool unless it's
* told it may not wait */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index f83328a7f048..d6209022e965 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object(
extern struct kmem_cache *fscache_cookie_jar;
extern void fscache_free_cookie(struct fscache_cookie *);
-extern void fscache_cookie_init_once(void *);
extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
const struct fscache_cookie_def *,
const void *, size_t,
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 7dce110bf17d..30ad89db1efc 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -143,9 +143,7 @@ static int __init fscache_init(void)
fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
sizeof(struct fscache_cookie),
- 0,
- 0,
- fscache_cookie_init_once);
+ 0, 0, NULL);
if (!fscache_cookie_jar) {
pr_notice("Failed to allocate a cookie jar\n");
ret = -ENOMEM;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c159a7f9a9e..84544a4f012d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -975,10 +975,6 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
{
struct gfs2_inode *ip = GFS2_I(inode);
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, inode->i_sb->s_blocksize,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
- }
gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
}
@@ -1061,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
}
}
release_metapath(&mp);
- if (gfs2_is_jdata(ip))
+ if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
iomap->page_done = gfs2_iomap_journaled_page_done;
return 0;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8e712b614e6e..933aac5da193 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl {
};
/* Lockdep class keys */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
+#endif
static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
int new_level);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index bf000c8aeffb..fec62e9dfbe6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2337,8 +2337,8 @@ late_initcall(ubifs_init);
static void __exit ubifs_exit(void)
{
- WARN_ON(list_empty(&ubifs_infos));
- WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
+ WARN_ON(!list_empty(&ubifs_infos));
+ WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0);
dbg_debugfs_exit();
ubifs_compressors_exit();
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index da9d95a19580..1e713154f00e 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -153,6 +153,17 @@ struct __drm_planes_state {
struct __drm_crtcs_state {
struct drm_crtc *ptr;
struct drm_crtc_state *state, *old_state, *new_state;
+
+ /**
+ * @commit:
+ *
+ * A reference to the CRTC commit object that is kept for use by
+ * drm_atomic_helper_wait_for_flip_done() after
+ * drm_atomic_helper_commit_hw_done() is called. This ensures that a
+ * concurrent commit won't free a commit object that is still in use.
+ */
+ struct drm_crtc_commit *commit;
+
s32 __user *out_fence_ptr;
u64 last_vblank_count;
};
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index b25d12ef120a..e3c404833115 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -214,9 +214,9 @@ struct detailed_timing {
#define DRM_EDID_HDMI_DC_Y444 (1 << 3)
/* YCBCR 420 deep color modes */
-#define DRM_EDID_YCBCR420_DC_48 (1 << 6)
-#define DRM_EDID_YCBCR420_DC_36 (1 << 5)
-#define DRM_EDID_YCBCR420_DC_30 (1 << 4)
+#define DRM_EDID_YCBCR420_DC_48 (1 << 2)
+#define DRM_EDID_YCBCR420_DC_36 (1 << 1)
+#define DRM_EDID_YCBCR420_DC_30 (1 << 0)
#define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \
DRM_EDID_YCBCR420_DC_36 | \
DRM_EDID_YCBCR420_DC_30)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..fdcb45999b26 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned char *vec);
extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
+ pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 472fa4d4ea62..7361cd3fddc1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -31,6 +31,7 @@
#define PHY_ID_KSZ8081 0x00221560
#define PHY_ID_KSZ8061 0x00221570
#define PHY_ID_KSZ9031 0x00221620
+#define PHY_ID_KSZ9131 0x00221640
#define PHY_ID_KSZ886X 0x00221430
#define PHY_ID_KSZ8863 0x00221435
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 0ef6138eca49..31a750570c38 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -61,6 +61,7 @@ struct mlx5_core_cq {
int reset_notify_added;
struct list_head reset_notify;
struct mlx5_eq *eq;
+ u16 uid;
};
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index e9b502d5bcc1..b4c0457fbebd 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1124,6 +1124,12 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
+#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \
+ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap)
+
+#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \
+ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap)
+
#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4b75796cac23..31460eeb6fe0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -133,6 +133,7 @@ enum {
MLX5_REG_PVLC = 0x500f,
MLX5_REG_PCMR = 0x5041,
MLX5_REG_PMLP = 0x5002,
+ MLX5_REG_PPLM = 0x5023,
MLX5_REG_PCAM = 0x507f,
MLX5_REG_NODE_DESC = 0x6001,
MLX5_REG_HOST_ENDIANNESS = 0x7004,
@@ -163,10 +164,7 @@ enum mlx5_dcbx_oper_mode {
};
enum mlx5_dct_atomic_mode {
- MLX5_ATOMIC_MODE_DCT_OFF = 20,
- MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF,
- MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF,
- MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF,
+ MLX5_ATOMIC_MODE_DCT_CX = 2,
};
enum {
@@ -360,7 +358,7 @@ struct mlx5_frag_buf {
};
struct mlx5_frag_buf_ctrl {
- struct mlx5_frag_buf frag_buf;
+ struct mlx5_buf_list *frags;
u32 sz_m1;
u16 frag_sz_m1;
u16 strides_offset;
@@ -477,6 +475,7 @@ struct mlx5_core_srq {
atomic_t refcount;
struct completion free;
+ u16 uid;
};
struct mlx5_eq_table {
@@ -996,10 +995,12 @@ static inline u32 mlx5_base_mkey(const u32 key)
return key & 0xffffff00u;
}
-static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
+ u8 log_stride, u8 log_sz,
u16 strides_offset,
struct mlx5_frag_buf_ctrl *fbc)
{
+ fbc->frags = frags;
fbc->log_stride = log_stride;
fbc->log_sz = log_sz;
fbc->sz_m1 = (1 << fbc->log_sz) - 1;
@@ -1008,18 +1009,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
fbc->strides_offset = strides_offset;
}
-static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc(struct mlx5_buf_list *frags,
+ u8 log_stride, u8 log_sz,
struct mlx5_frag_buf_ctrl *fbc)
{
- mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
-}
-
-static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
- void *cqc)
-{
- mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz),
- MLX5_GET(cqc, cqc, log_cq_size),
- fbc);
+ mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc);
}
static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
@@ -1030,8 +1024,15 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
ix += fbc->strides_offset;
frag = ix >> fbc->log_frag_strides;
- return fbc->frag_buf.frags[frag].buf +
- ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+ return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
+static inline u32
+mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
+{
+ u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
+
+ return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
}
int mlx5_cmd_init(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 804516e4f483..5660f07d3be0 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -45,7 +45,8 @@ enum {
};
enum {
- MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0),
+ MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0),
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
};
#define LEFTOVERS_RULE_NUM 2
@@ -91,7 +92,7 @@ struct mlx5_flow_destination {
u32 tir_num;
u32 ft_num;
struct mlx5_flow_table *ft;
- struct mlx5_fc *counter;
+ u32 counter_id;
struct {
u16 num;
u16 vhca_id;
@@ -101,6 +102,8 @@ struct mlx5_flow_destination {
};
struct mlx5_flow_namespace *
+mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n);
+struct mlx5_flow_namespace *
mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type);
struct mlx5_flow_namespace *
@@ -155,20 +158,28 @@ struct mlx5_fs_vlan {
#define MLX5_FS_VLAN_DEPTH 2
+enum {
+ FLOW_ACT_HAS_TAG = BIT(0),
+ FLOW_ACT_NO_APPEND = BIT(1),
+};
+
struct mlx5_flow_act {
u32 action;
- bool has_flow_tag;
u32 flow_tag;
- u32 encap_id;
+ u32 reformat_id;
u32 modify_id;
uintptr_t esp_id;
+ u32 flags;
struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
struct ib_counters *counters;
};
#define MLX5_DECLARE_FLOW_ACT(name) \
- struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
- MLX5_FS_DEFAULT_FLOW_TAG, 0, 0}
+ struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
+ .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
+ .reformat_id = 0, \
+ .modify_id = 0, \
+ .flags = 0, }
/* Single destination per rule.
* Group ID is implied by the match criteria.
@@ -185,15 +196,30 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
struct mlx5_flow_destination *new_dest,
struct mlx5_flow_destination *old_dest);
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler);
struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
void mlx5_fc_query_cached(struct mlx5_fc *counter,
u64 *bytes, u64 *packets, u64 *lastuse);
int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
u64 *packets, u64 *bytes);
+u32 mlx5_fc_id(struct mlx5_fc *counter);
int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 namespace, u8 num_actions,
+ void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+ u32 modify_header_id);
+
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ u32 *packet_reformat_id);
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+ u32 packet_reformat_id);
+
#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6e8a882052b1..dbff9ff28f2c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -243,8 +243,8 @@ enum {
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a,
MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c,
- MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d,
- MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
@@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 modify_root[0x1];
u8 identified_miss_table_mode[0x1];
u8 flow_table_modify[0x1];
- u8 encap[0x1];
+ u8 reformat[0x1];
u8 decap[0x1];
u8 reserved_at_9[0x1];
u8 pop_vlan[0x1];
@@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reserved_at_c[0x1];
u8 pop_vlan_2[0x1];
u8 push_vlan_2[0x1];
- u8 reserved_at_f[0x11];
-
+ u8 reformat_and_vlan_action[0x1];
+ u8 reserved_at_10[0x2];
+ u8 reformat_l3_tunnel_to_l2[0x1];
+ u8 reformat_l2_to_l3_tunnel[0x1];
+ u8 reformat_and_modify_action[0x1];
+ u8 reserved_at_14[0xb];
u8 reserved_at_20[0x2];
u8 log_max_ft_size[0x6];
u8 log_max_modify_header_context[0x8];
@@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 nic_rx_multi_path_tirs[0x1];
u8 nic_rx_multi_path_tirs_fts[0x1];
u8 allow_sniffer_and_nic_rx_shared_tir[0x1];
- u8 reserved_at_3[0x1fd];
+ u8 reserved_at_3[0x1d];
+ u8 encap_general_header[0x1];
+ u8 reserved_at_21[0xa];
+ u8 log_max_packet_reformat_context[0x5];
+ u8 reserved_at_30[0x6];
+ u8 max_encap_header_size[0xa];
+ u8 reserved_at_40[0x1c0];
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
@@ -574,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
struct mlx5_ifc_flow_table_eswitch_cap_bits {
u8 reserved_at_0[0x1c];
u8 fdb_multi_path_to_table[0x1];
- u8 reserved_at_1d[0x1e3];
+ u8 reserved_at_1d[0x1];
+ u8 multi_fdb_encap[0x1];
+ u8 reserved_at_1e[0x1e1];
struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
@@ -599,7 +611,7 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vxlan_encap_decap[0x1];
u8 nvgre_encap_decap[0x1];
u8 reserved_at_22[0x9];
- u8 log_max_encap_headers[0x5];
+ u8 log_max_packet_reformat_context[0x5];
u8 reserved_2b[0x6];
u8 max_encap_header_size[0xa];
@@ -996,7 +1008,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 umr_modify_atomic_disabled[0x1];
u8 umr_indirect_mkey_disabled[0x1];
u8 umr_fence[0x2];
- u8 reserved_at_20c[0x3];
+ u8 dc_req_scat_data_cqe[0x1];
+ u8 reserved_at_20d[0x2];
u8 drain_sigerr[0x1];
u8 cmdif_checksum[0x2];
u8 sigerr_cqe[0x1];
@@ -1281,7 +1294,9 @@ struct mlx5_ifc_wq_bits {
u8 reserved_at_118[0x3];
u8 log_wq_sz[0x5];
- u8 reserved_at_120[0x3];
+ u8 dbr_umem_valid[0x1];
+ u8 wq_umem_valid[0x1];
+ u8 reserved_at_122[0x1];
u8 log_hairpin_num_packets[0x5];
u8 reserved_at_128[0x3];
u8 log_hairpin_data_sz[0x5];
@@ -2355,7 +2370,10 @@ struct mlx5_ifc_qpc_bits {
u8 dc_access_key[0x40];
- u8 reserved_at_680[0xc0];
+ u8 reserved_at_680[0x3];
+ u8 dbr_umem_valid[0x1];
+
+ u8 reserved_at_684[0xbc];
};
struct mlx5_ifc_roce_addr_layout_bits {
@@ -2395,7 +2413,7 @@ enum {
MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4,
MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8,
- MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10,
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10,
MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20,
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40,
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP = 0x80,
@@ -2428,7 +2446,7 @@ struct mlx5_ifc_flow_context_bits {
u8 reserved_at_a0[0x8];
u8 flow_counter_list_size[0x18];
- u8 encap_id[0x20];
+ u8 packet_reformat_id[0x20];
u8 modify_header_id[0x20];
@@ -2455,7 +2473,7 @@ struct mlx5_ifc_xrc_srqc_bits {
u8 wq_signature[0x1];
u8 cont_srq[0x1];
- u8 reserved_at_22[0x1];
+ u8 dbr_umem_valid[0x1];
u8 rlky[0x1];
u8 basic_cyclic_rcv_wqe[0x1];
u8 log_rq_stride[0x3];
@@ -2550,8 +2568,8 @@ enum {
};
enum {
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST = 0x1,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST = 0x2,
};
struct mlx5_ifc_tirc_bits {
@@ -3119,7 +3137,9 @@ enum {
struct mlx5_ifc_cqc_bits {
u8 status[0x4];
- u8 reserved_at_4[0x4];
+ u8 reserved_at_4[0x2];
+ u8 dbr_umem_valid[0x1];
+ u8 reserved_at_7[0x1];
u8 cqe_sz[0x3];
u8 cc[0x1];
u8 reserved_at_c[0x1];
@@ -3386,7 +3406,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits {
struct mlx5_ifc_sqerr2rts_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -3416,7 +3436,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits {
struct mlx5_ifc_sqd2rts_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -3621,7 +3641,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits {
struct mlx5_ifc_rts2rts_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -3651,7 +3671,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits {
struct mlx5_ifc_rtr2rts_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -3681,7 +3701,7 @@ struct mlx5_ifc_rst2init_qp_out_bits {
struct mlx5_ifc_rst2init_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -4804,19 +4824,19 @@ struct mlx5_ifc_query_eq_in_bits {
u8 reserved_at_60[0x20];
};
-struct mlx5_ifc_encap_header_in_bits {
+struct mlx5_ifc_packet_reformat_context_in_bits {
u8 reserved_at_0[0x5];
- u8 header_type[0x3];
+ u8 reformat_type[0x3];
u8 reserved_at_8[0xe];
- u8 encap_header_size[0xa];
+ u8 reformat_data_size[0xa];
u8 reserved_at_20[0x10];
- u8 encap_header[2][0x8];
+ u8 reformat_data[2][0x8];
- u8 more_encap_header[0][0x8];
+ u8 more_reformat_data[0][0x8];
};
-struct mlx5_ifc_query_encap_header_out_bits {
+struct mlx5_ifc_query_packet_reformat_context_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -4824,33 +4844,41 @@ struct mlx5_ifc_query_encap_header_out_bits {
u8 reserved_at_40[0xa0];
- struct mlx5_ifc_encap_header_in_bits encap_header[0];
+ struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0];
};
-struct mlx5_ifc_query_encap_header_in_bits {
+struct mlx5_ifc_query_packet_reformat_context_in_bits {
u8 opcode[0x10];
u8 reserved_at_10[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
- u8 encap_id[0x20];
+ u8 packet_reformat_id[0x20];
u8 reserved_at_60[0xa0];
};
-struct mlx5_ifc_alloc_encap_header_out_bits {
+struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
- u8 encap_id[0x20];
+ u8 packet_reformat_id[0x20];
u8 reserved_at_60[0x20];
};
-struct mlx5_ifc_alloc_encap_header_in_bits {
+enum {
+ MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
+ MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
+ MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
+ MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+};
+
+struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
u8 opcode[0x10];
u8 reserved_at_10[0x10];
@@ -4859,10 +4887,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits {
u8 reserved_at_40[0xa0];
- struct mlx5_ifc_encap_header_in_bits encap_header;
+ struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context;
};
-struct mlx5_ifc_dealloc_encap_header_out_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -4871,14 +4899,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits {
u8 reserved_at_40[0x40];
};
-struct mlx5_ifc_dealloc_encap_header_in_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_in_bits {
u8 opcode[0x10];
u8 reserved_at_10[0x10];
u8 reserved_20[0x10];
u8 op_mod[0x10];
- u8 encap_id[0x20];
+ u8 packet_reformat_id[0x20];
u8 reserved_60[0x20];
};
@@ -5176,7 +5204,7 @@ struct mlx5_ifc_qp_2rst_out_bits {
struct mlx5_ifc_qp_2rst_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5198,7 +5226,7 @@ struct mlx5_ifc_qp_2err_out_bits {
struct mlx5_ifc_qp_2err_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5298,7 +5326,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits {
struct mlx5_ifc_modify_tis_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5337,7 +5365,7 @@ struct mlx5_ifc_modify_tir_out_bits {
struct mlx5_ifc_modify_tir_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5365,7 +5393,7 @@ struct mlx5_ifc_modify_sq_out_bits {
struct mlx5_ifc_modify_sq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5438,7 +5466,7 @@ struct mlx5_ifc_rqt_bitmask_bits {
struct mlx5_ifc_modify_rqt_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5472,7 +5500,7 @@ enum {
struct mlx5_ifc_modify_rq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5508,7 +5536,7 @@ struct mlx5_ifc_rmp_bitmask_bits {
struct mlx5_ifc_modify_rmp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5613,7 +5641,7 @@ enum {
struct mlx5_ifc_modify_cq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5625,7 +5653,10 @@ struct mlx5_ifc_modify_cq_in_bits {
struct mlx5_ifc_cqc_bits cq_context;
- u8 reserved_at_280[0x600];
+ u8 reserved_at_280[0x40];
+
+ u8 cq_umem_valid[0x1];
+ u8 reserved_at_2c1[0x5bf];
u8 pas[0][0x40];
};
@@ -5773,7 +5804,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits {
struct mlx5_ifc_init2rtr_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5803,7 +5834,7 @@ struct mlx5_ifc_init2init_qp_out_bits {
struct mlx5_ifc_init2init_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5902,7 +5933,7 @@ struct mlx5_ifc_drain_dct_out_bits {
struct mlx5_ifc_drain_dct_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5946,7 +5977,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits {
struct mlx5_ifc_detach_from_mcg_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5970,7 +6001,7 @@ struct mlx5_ifc_destroy_xrq_out_bits {
struct mlx5_ifc_destroy_xrq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -5992,7 +6023,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits {
struct mlx5_ifc_destroy_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6014,7 +6045,7 @@ struct mlx5_ifc_destroy_tis_out_bits {
struct mlx5_ifc_destroy_tis_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6036,7 +6067,7 @@ struct mlx5_ifc_destroy_tir_out_bits {
struct mlx5_ifc_destroy_tir_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6058,7 +6089,7 @@ struct mlx5_ifc_destroy_srq_out_bits {
struct mlx5_ifc_destroy_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6080,7 +6111,7 @@ struct mlx5_ifc_destroy_sq_out_bits {
struct mlx5_ifc_destroy_sq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6126,7 +6157,7 @@ struct mlx5_ifc_destroy_rqt_out_bits {
struct mlx5_ifc_destroy_rqt_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6148,7 +6179,7 @@ struct mlx5_ifc_destroy_rq_out_bits {
struct mlx5_ifc_destroy_rq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6192,7 +6223,7 @@ struct mlx5_ifc_destroy_rmp_out_bits {
struct mlx5_ifc_destroy_rmp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6214,7 +6245,7 @@ struct mlx5_ifc_destroy_qp_out_bits {
struct mlx5_ifc_destroy_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6366,7 +6397,7 @@ struct mlx5_ifc_destroy_dct_out_bits {
struct mlx5_ifc_destroy_dct_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6388,7 +6419,7 @@ struct mlx5_ifc_destroy_cq_out_bits {
struct mlx5_ifc_destroy_cq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6491,7 +6522,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits {
struct mlx5_ifc_dealloc_xrcd_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6579,7 +6610,7 @@ struct mlx5_ifc_dealloc_pd_out_bits {
struct mlx5_ifc_dealloc_pd_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6625,7 +6656,7 @@ struct mlx5_ifc_create_xrq_out_bits {
struct mlx5_ifc_create_xrq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6649,7 +6680,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits {
struct mlx5_ifc_create_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6658,7 +6689,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
- u8 reserved_at_280[0x600];
+ u8 reserved_at_280[0x40];
+ u8 xrc_srq_umem_valid[0x1];
+ u8 reserved_at_2c1[0x5bf];
u8 pas[0][0x40];
};
@@ -6677,7 +6710,7 @@ struct mlx5_ifc_create_tis_out_bits {
struct mlx5_ifc_create_tis_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6701,7 +6734,7 @@ struct mlx5_ifc_create_tir_out_bits {
struct mlx5_ifc_create_tir_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6725,7 +6758,7 @@ struct mlx5_ifc_create_srq_out_bits {
struct mlx5_ifc_create_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6753,7 +6786,7 @@ struct mlx5_ifc_create_sq_out_bits {
struct mlx5_ifc_create_sq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6807,7 +6840,7 @@ struct mlx5_ifc_create_rqt_out_bits {
struct mlx5_ifc_create_rqt_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6831,7 +6864,7 @@ struct mlx5_ifc_create_rq_out_bits {
struct mlx5_ifc_create_rq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6855,7 +6888,7 @@ struct mlx5_ifc_create_rmp_out_bits {
struct mlx5_ifc_create_rmp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6879,7 +6912,7 @@ struct mlx5_ifc_create_qp_out_bits {
struct mlx5_ifc_create_qp_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -6892,7 +6925,10 @@ struct mlx5_ifc_create_qp_in_bits {
struct mlx5_ifc_qpc_bits qpc;
- u8 reserved_at_800[0x80];
+ u8 reserved_at_800[0x60];
+
+ u8 wq_umem_valid[0x1];
+ u8 reserved_at_861[0x1f];
u8 pas[0][0x40];
};
@@ -6954,7 +6990,8 @@ struct mlx5_ifc_create_mkey_in_bits {
u8 reserved_at_40[0x20];
u8 pg_access[0x1];
- u8 reserved_at_61[0x1f];
+ u8 mkey_umem_valid[0x1];
+ u8 reserved_at_62[0x1e];
struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
@@ -6980,7 +7017,7 @@ struct mlx5_ifc_create_flow_table_out_bits {
};
struct mlx5_ifc_flow_table_context_bits {
- u8 encap_en[0x1];
+ u8 reformat_en[0x1];
u8 decap_en[0x1];
u8 reserved_at_2[0x2];
u8 table_miss_action[0x4];
@@ -7122,7 +7159,7 @@ struct mlx5_ifc_create_dct_out_bits {
struct mlx5_ifc_create_dct_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7148,7 +7185,7 @@ struct mlx5_ifc_create_cq_out_bits {
struct mlx5_ifc_create_cq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7157,7 +7194,10 @@ struct mlx5_ifc_create_cq_in_bits {
struct mlx5_ifc_cqc_bits cq_context;
- u8 reserved_at_280[0x600];
+ u8 reserved_at_280[0x60];
+
+ u8 cq_umem_valid[0x1];
+ u8 reserved_at_2e1[0x59f];
u8 pas[0][0x40];
};
@@ -7205,7 +7245,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits {
struct mlx5_ifc_attach_to_mcg_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7256,7 +7296,7 @@ enum {
struct mlx5_ifc_arm_xrc_srq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7284,7 +7324,7 @@ enum {
struct mlx5_ifc_arm_rq_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7332,7 +7372,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits {
struct mlx5_ifc_alloc_xrcd_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7420,7 +7460,7 @@ struct mlx5_ifc_alloc_pd_out_bits {
struct mlx5_ifc_alloc_pd_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -7788,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits {
struct mlx5_ifc_pplm_reg_bits {
u8 reserved_at_0[0x8];
- u8 local_port[0x8];
- u8 reserved_at_10[0x10];
+ u8 local_port[0x8];
+ u8 reserved_at_10[0x10];
- u8 reserved_at_20[0x20];
+ u8 reserved_at_20[0x20];
- u8 port_profile_mode[0x8];
- u8 static_port_profile[0x8];
- u8 active_port_profile[0x8];
- u8 reserved_at_58[0x8];
+ u8 port_profile_mode[0x8];
+ u8 static_port_profile[0x8];
+ u8 active_port_profile[0x8];
+ u8 reserved_at_58[0x8];
- u8 retransmission_active[0x8];
- u8 fec_mode_active[0x18];
+ u8 retransmission_active[0x8];
+ u8 fec_mode_active[0x18];
- u8 reserved_at_80[0x20];
+ u8 rs_fec_correction_bypass_cap[0x4];
+ u8 reserved_at_84[0x8];
+ u8 fec_override_cap_56g[0x4];
+ u8 fec_override_cap_100g[0x4];
+ u8 fec_override_cap_50g[0x4];
+ u8 fec_override_cap_25g[0x4];
+ u8 fec_override_cap_10g_40g[0x4];
+
+ u8 rs_fec_correction_bypass_admin[0x4];
+ u8 reserved_at_a4[0x8];
+ u8 fec_override_admin_56g[0x4];
+ u8 fec_override_admin_100g[0x4];
+ u8 fec_override_admin_50g[0x4];
+ u8 fec_override_admin_25g[0x4];
+ u8 fec_override_admin_10g_40g[0x4];
};
struct mlx5_ifc_ppcnt_reg_bits {
@@ -8086,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
u8 rx_icrc_encapsulated_counter[0x1];
u8 reserved_at_6e[0x8];
u8 pfcc_mask[0x1];
- u8 reserved_at_77[0x4];
+ u8 reserved_at_77[0x3];
+ u8 per_lane_error_counters[0x1];
u8 rx_buffer_fullness_counters[0x1];
u8 ptys_connector_type[0x1];
u8 reserved_at_7d[0x1];
@@ -8097,7 +8152,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
u8 port_access_reg_cap_mask_127_to_96[0x20];
u8 port_access_reg_cap_mask_95_to_64[0x20];
- u8 port_access_reg_cap_mask_63_to_32[0x20];
+
+ u8 port_access_reg_cap_mask_63_to_36[0x1c];
+ u8 pplm[0x1];
+ u8 port_access_reg_cap_mask_34_to_32[0x3];
u8 port_access_reg_cap_mask_31_to_13[0x13];
u8 pbmc[0x1];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4778d41085d4..fbe322c966bc 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -471,6 +471,7 @@ struct mlx5_core_qp {
int qpn;
struct mlx5_rsc_debug *dbg;
int pid;
+ u16 uid;
};
struct mlx5_core_dct {
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index 24ff23e27c8a..1b1f3c20c6a3 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -61,6 +61,7 @@ struct mlx5_srq_attr {
u32 tm_next_tag;
u32 tm_hw_phase_cnt;
u32 tm_sw_phase_cnt;
+ u16 uid;
};
struct mlx5_core_dev;
diff --git a/include/linux/module.h b/include/linux/module.h
index f807f15bebbe..e19ae08c7fb8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -20,6 +20,7 @@
#include <linux/export.h>
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
+#include <linux/tracepoint-defs.h>
#include <linux/percpu.h>
#include <asm/module.h>
@@ -430,7 +431,7 @@ struct module {
#ifdef CONFIG_TRACEPOINTS
unsigned int num_tracepoints;
- struct tracepoint * const *tracepoints_ptrs;
+ tracepoint_ptr_t *tracepoints_ptrs;
#endif
#ifdef HAVE_JUMP_LABEL
struct jump_entry *jump_entries;
diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index ecf7dab81e9e..c6000046c966 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
const struct list_head *nf_osf_fingers);
const char *nf_osf_find(const struct sk_buff *skb,
- const struct list_head *nf_osf_fingers);
+ const struct list_head *nf_osf_fingers,
+ const int ttl_check);
#endif /* _NFOSF_H */
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 3ef82d3a78db..676f1ff161a9 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -31,8 +31,6 @@ struct netpoll {
bool ipv6;
u16 local_port, remote_port;
u8 remote_mac[ETH_ALEN];
-
- struct work_struct cleanup_work;
};
struct netpoll_info {
@@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt);
int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
int netpoll_setup(struct netpoll *np);
void __netpoll_cleanup(struct netpoll *np);
-void __netpoll_free_async(struct netpoll *np);
+void __netpoll_free(struct netpoll *np);
void netpoll_cleanup(struct netpoll *np);
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
struct net_device *dev);
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 10f92e1d8e7b..bf309ff6f244 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -99,6 +99,7 @@ struct arm_pmu {
void (*stop)(struct arm_pmu *);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
+ int (*filter_match)(struct perf_event *event);
int num_events;
bool secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index dee3c9c744f7..a47321a0d572 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -667,15 +667,35 @@ enum qed_link_mode_bits {
QED_LM_Autoneg_BIT = BIT(1),
QED_LM_Asym_Pause_BIT = BIT(2),
QED_LM_Pause_BIT = BIT(3),
- QED_LM_1000baseT_Half_BIT = BIT(4),
- QED_LM_1000baseT_Full_BIT = BIT(5),
+ QED_LM_1000baseT_Full_BIT = BIT(4),
+ QED_LM_10000baseT_Full_BIT = BIT(5),
QED_LM_10000baseKR_Full_BIT = BIT(6),
QED_LM_20000baseKR2_Full_BIT = BIT(7),
QED_LM_25000baseKR_Full_BIT = BIT(8),
QED_LM_40000baseLR4_Full_BIT = BIT(9),
QED_LM_50000baseKR2_Full_BIT = BIT(10),
QED_LM_100000baseKR4_Full_BIT = BIT(11),
- QED_LM_COUNT = 11
+ QED_LM_2500baseX_Full_BIT = BIT(12),
+ QED_LM_Backplane_BIT = BIT(13),
+ QED_LM_1000baseKX_Full_BIT = BIT(14),
+ QED_LM_10000baseKX4_Full_BIT = BIT(15),
+ QED_LM_10000baseR_FEC_BIT = BIT(16),
+ QED_LM_40000baseKR4_Full_BIT = BIT(17),
+ QED_LM_40000baseCR4_Full_BIT = BIT(18),
+ QED_LM_40000baseSR4_Full_BIT = BIT(19),
+ QED_LM_25000baseCR_Full_BIT = BIT(20),
+ QED_LM_25000baseSR_Full_BIT = BIT(21),
+ QED_LM_50000baseCR2_Full_BIT = BIT(22),
+ QED_LM_100000baseSR4_Full_BIT = BIT(23),
+ QED_LM_100000baseCR4_Full_BIT = BIT(24),
+ QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25),
+ QED_LM_50000baseSR2_Full_BIT = BIT(26),
+ QED_LM_1000baseX_Full_BIT = BIT(27),
+ QED_LM_10000baseCR_Full_BIT = BIT(28),
+ QED_LM_10000baseSR_Full_BIT = BIT(29),
+ QED_LM_10000baseLR_Full_BIT = BIT(30),
+ QED_LM_10000baseLRM_Full_BIT = BIT(31),
+ QED_LM_COUNT = 32
};
struct qed_link_params {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 119d092c6b13..0ba687454267 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
#define __it(x, op) (x -= sizeof(u##op))
#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
case 32: diffs |= __it_diff(a, b, 64);
+ /* fall through */
case 24: diffs |= __it_diff(a, b, 64);
+ /* fall through */
case 16: diffs |= __it_diff(a, b, 64);
+ /* fall through */
case 8: diffs |= __it_diff(a, b, 64);
break;
case 28: diffs |= __it_diff(a, b, 64);
+ /* fall through */
case 20: diffs |= __it_diff(a, b, 64);
+ /* fall through */
case 12: diffs |= __it_diff(a, b, 64);
+ /* fall through */
case 4: diffs |= __it_diff(a, b, 32);
break;
}
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 22c5a46e9693..49ba9cde7e4b 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,6 +35,12 @@ struct tracepoint {
struct tracepoint_func __rcu *funcs;
};
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef const int tracepoint_ptr_t;
+#else
+typedef struct tracepoint * const tracepoint_ptr_t;
+#endif
+
struct bpf_raw_event_map {
struct tracepoint *tp;
void *bpf_func;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 041f7e56a289..538ba1a58f5b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -99,6 +99,29 @@ extern void syscall_unregfunc(void);
#define TRACE_DEFINE_ENUM(x)
#define TRACE_DEFINE_SIZEOF(x)
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+ return offset_to_ptr(p);
+}
+
+#define __TRACEPOINT_ENTRY(name) \
+ asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \
+ " .balign 4 \n" \
+ " .long __tracepoint_" #name " - . \n" \
+ " .previous \n")
+#else
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+ return *p;
+}
+
+#define __TRACEPOINT_ENTRY(name) \
+ static tracepoint_ptr_t __tracepoint_ptr_##name __used \
+ __attribute__((section("__tracepoints_ptrs"))) = \
+ &__tracepoint_##name
+#endif
+
#endif /* _LINUX_TRACEPOINT_H */
/*
@@ -253,19 +276,6 @@ extern void syscall_unregfunc(void);
return static_key_false(&__tracepoint_##name.key); \
}
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define __TRACEPOINT_ENTRY(name) \
- asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \
- " .balign 4 \n" \
- " .long __tracepoint_" #name " - . \n" \
- " .previous \n")
-#else
-#define __TRACEPOINT_ENTRY(name) \
- static struct tracepoint * const __tracepoint_ptr_##name __used \
- __attribute__((section("__tracepoints_ptrs"))) = \
- &__tracepoint_##name
-#endif
-
/*
* We have no guarantee that gcc and the linker won't up-align the tracepoint
* structures, so we create an array of pointers that will be used for iteration
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 3555440e14fc..093aedebdf0c 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -277,12 +277,19 @@ struct l2cap_conn_rsp {
#define L2CAP_CR_SEC_BLOCK 0x0003
#define L2CAP_CR_NO_MEM 0x0004
#define L2CAP_CR_BAD_AMP 0x0005
-#define L2CAP_CR_AUTHENTICATION 0x0005
-#define L2CAP_CR_AUTHORIZATION 0x0006
-#define L2CAP_CR_BAD_KEY_SIZE 0x0007
-#define L2CAP_CR_ENCRYPTION 0x0008
-#define L2CAP_CR_INVALID_SCID 0x0009
-#define L2CAP_CR_SCID_IN_USE 0x000A
+#define L2CAP_CR_INVALID_SCID 0x0006
+#define L2CAP_CR_SCID_IN_USE 0x0007
+
+/* credit based connect results */
+#define L2CAP_CR_LE_SUCCESS 0x0000
+#define L2CAP_CR_LE_BAD_PSM 0x0002
+#define L2CAP_CR_LE_NO_MEM 0x0004
+#define L2CAP_CR_LE_AUTHENTICATION 0x0005
+#define L2CAP_CR_LE_AUTHORIZATION 0x0006
+#define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007
+#define L2CAP_CR_LE_ENCRYPTION 0x0008
+#define L2CAP_CR_LE_INVALID_SCID 0x0009
+#define L2CAP_CR_LE_SCID_IN_USE 0X000A
/* connect/create channel status */
#define L2CAP_CS_NO_INFO 0x0000
diff --git a/include/net/dst.h b/include/net/dst.h
index 7f735e76ca73..6cf0870414c7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
dst->ops->update_pmtu(dst, NULL, skb, mtu);
}
+static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+ struct dst_entry *encap_dst,
+ int headroom)
+{
+ u32 encap_mtu = dst_mtu(encap_dst);
+
+ if (skb->len > encap_mtu - headroom)
+ skb_dst_update_pmtu(skb, encap_mtu - headroom);
+}
+
#endif /* _NET_DST_H */
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 482a1b705362..c8e2bebd8d93 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
* 1 if something is broken and should be logged (!!! above)
* 2 if packet should be dropped
*/
-static inline int INET_ECN_decapsulate(struct sk_buff *skb,
- __u8 outer, __u8 inner)
+static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
{
if (INET_ECN_is_not_ect(inner)) {
switch (outer & INET_ECN_MASK) {
@@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
}
}
- if (INET_ECN_is_ce(outer))
+ *set_ce = INET_ECN_is_ce(outer);
+ return 0;
+}
+
+static inline int INET_ECN_decapsulate(struct sk_buff *skb,
+ __u8 outer, __u8 inner)
+{
+ bool set_ce = false;
+ int rc;
+
+ rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
+ if (!rc && set_ce)
INET_ECN_set_ce(skb);
- return 0;
+ return rc;
}
static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index caabfd84a098..84097010237c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -159,6 +159,10 @@ struct fib6_info {
struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ unsigned long last_probe;
+#endif
+
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 0e355f4a3d76..77e2761d4f2f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -99,7 +99,7 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void (*iter)(struct flow_offload *flow, void *data),
void *data);
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+void nf_flow_table_cleanup(struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/include/net/netfilter/nfnetlink_log.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 86f034b524d4..8dadc74c22e7 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -148,11 +148,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive)
#define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \
a->chunk_hdr->type == SCTP_CID_I_DATA)
-/* Calculate the actual data size in a data chunk */
-#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \
- (unsigned long)(c->chunk_hdr) - \
- sctp_datachk_len(&c->asoc->stream)))
-
/* Internal error codes */
enum sctp_ierror {
SCTP_IERROR_NO_ERROR = 0,
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 5ef1bad81ef5..9e3d32746430 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
__u16 size;
size = ntohs(chunk->chunk_hdr->length);
- size -= sctp_datahdr_len(&chunk->asoc->stream);
+ size -= sctp_datachk_len(&chunk->asoc->stream);
return size;
}
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 28a7c8e44636..a11f93790476 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -876,6 +876,8 @@ struct sctp_transport {
unsigned long sackdelay;
__u32 sackfreq;
+ atomic_t mtu_info;
+
/* When was the last time that we heard from this transport? We use
* this to pick new active and retran paths.
*/
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d574ce63bf22..881ecb1555bf 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -145,6 +145,10 @@ enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_DEVICE,
SWITCHDEV_FDB_DEL_TO_DEVICE,
SWITCHDEV_FDB_OFFLOADED,
+
+ SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+ SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
+ SWITCHDEV_VXLAN_FDB_OFFLOADED,
};
struct switchdev_notifier_info {
@@ -155,7 +159,8 @@ struct switchdev_notifier_fdb_info {
struct switchdev_notifier_info info; /* must be first */
const unsigned char *addr;
u16 vid;
- bool added_by_user;
+ u8 added_by_user:1,
+ offloaded:1;
};
static inline struct net_device *
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7ef15179f263..03431c148e16 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -5,6 +5,8 @@
#include <linux/if_vlan.h>
#include <net/udp_tunnel.h>
#include <net/dst_metadata.h>
+#include <net/rtnetlink.h>
+#include <net/switchdev.h>
/* VXLAN protocol (RFC 7348) header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -190,6 +192,7 @@ union vxlan_addr {
struct vxlan_rdst {
union vxlan_addr remote_ip;
__be16 remote_port;
+ u8 offloaded:1;
__be32 remote_vni;
u32 remote_ifindex;
struct list_head list;
@@ -370,4 +373,65 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
return vs->sock->sk->sk_family;
}
+#if IS_ENABLED(CONFIG_IPV6)
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+ if (ipa->sa.sa_family == AF_INET6)
+ return ipv6_addr_any(&ipa->sin6.sin6_addr);
+ else
+ return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+ if (ipa->sa.sa_family == AF_INET6)
+ return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
+ else
+ return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#else /* !IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+ return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+ return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool netif_is_vxlan(const struct net_device *dev)
+{
+ return dev->rtnl_link_ops &&
+ !strcmp(dev->rtnl_link_ops->kind, "vxlan");
+}
+
+struct switchdev_notifier_vxlan_fdb_info {
+ struct switchdev_notifier_info info; /* must be first */
+ union vxlan_addr remote_ip;
+ __be16 remote_port;
+ __be32 remote_vni;
+ u32 remote_ifindex;
+ u8 eth_addr[ETH_ALEN];
+ __be32 vni;
+ bool offloaded;
+};
+
+#if IS_ENABLED(CONFIG_VXLAN)
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info);
+#else
+static inline int
+vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ return -ENOENT;
+}
+#endif
+
#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 5444e76870bb..579974b0bf0d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1511,9 +1511,16 @@ enum nft_flowtable_hook_attributes {
};
#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1)
+/**
+ * enum nft_osf_attributes - nftables osf expression netlink attributes
+ *
+ * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
+ */
enum nft_osf_attributes {
NFTA_OSF_UNSPEC,
NFTA_OSF_DREG,
+ NFTA_OSF_TTL,
__NFTA_OSF_MAX,
};
#define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_quota.h b/include/uapi/linux/netfilter/xt_quota.h
index d72fd52adbba..f3ba5d9e58b6 100644
--- a/include/uapi/linux/netfilter/xt_quota.h
+++ b/include/uapi/linux/netfilter/xt_quota.h
@@ -15,11 +15,9 @@ struct xt_quota_info {
__u32 flags;
__u32 pad;
__aligned_u64 quota;
-#ifdef __KERNEL__
- atomic64_t counter;
-#else
- __aligned_u64 remain;
-#endif
+
+ /* Used internally by the kernel */
+ struct xt_quota_priv *master;
};
#endif /* _XT_QUOTA_H */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index b479db5c71d9..34dd3d497f2c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -301,6 +301,7 @@ enum sctp_sinfo_flags {
SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */
/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
SCTP_SENDALL = (1 << 6),
+ SCTP_PR_SCTP_ALL = (1 << 7),
SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */
SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */
};
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index ef0b7b6ef8a5..686d244e798d 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
sock_hold(sock->sk);
old_xs = xchg(&m->xsk_map[i], xs);
- if (old_xs) {
- /* Make sure we've flushed everything. */
- synchronize_net();
+ if (old_xs)
sock_put((struct sock *)old_xs);
- }
sockfd_put(sock);
return 0;
@@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
old_xs = xchg(&m->xsk_map[k], NULL);
- if (old_xs) {
- /* Make sure we've flushed everything. */
- synchronize_net();
+ if (old_xs)
sock_put((struct sock *)old_xs);
- }
return 0;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7fc4a371bdd2..908c9cdae2f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4001,7 +4001,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* put back on, and if we advance min_vruntime, we'll be placed back
* further than we started -- ie. we'll be penalized.
*/
- if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+ if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
update_min_vruntime(cfs_rq);
}
@@ -4476,9 +4476,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
/*
* Add to the _head_ of the list, so that an already-started
- * distribute_cfs_runtime will not see us
+ * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is
+ * not running add to the tail so that later runqueues don't get starved.
*/
- list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+ if (cfs_b->distribute_running)
+ list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+ else
+ list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
/*
* If we're the first throttled task, make sure the bandwidth
@@ -4622,14 +4626,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
* in us over-using our runtime if it is all used during this loop, but
* only by limited amounts in that extreme case.
*/
- while (throttled && cfs_b->runtime > 0) {
+ while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
runtime = cfs_b->runtime;
+ cfs_b->distribute_running = 1;
raw_spin_unlock(&cfs_b->lock);
/* we can't nest cfs_b->lock while distributing bandwidth */
runtime = distribute_cfs_runtime(cfs_b, runtime,
runtime_expires);
raw_spin_lock(&cfs_b->lock);
+ cfs_b->distribute_running = 0;
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
cfs_b->runtime -= min(runtime, cfs_b->runtime);
@@ -4740,6 +4746,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
/* confirm we're still not at a refresh boundary */
raw_spin_lock(&cfs_b->lock);
+ if (cfs_b->distribute_running) {
+ raw_spin_unlock(&cfs_b->lock);
+ return;
+ }
+
if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
raw_spin_unlock(&cfs_b->lock);
return;
@@ -4749,6 +4760,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
runtime = cfs_b->runtime;
expires = cfs_b->runtime_expires;
+ if (runtime)
+ cfs_b->distribute_running = 1;
+
raw_spin_unlock(&cfs_b->lock);
if (!runtime)
@@ -4759,6 +4773,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
raw_spin_lock(&cfs_b->lock);
if (expires == cfs_b->runtime_expires)
cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ cfs_b->distribute_running = 0;
raw_spin_unlock(&cfs_b->lock);
}
@@ -4867,6 +4882,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period_timer.function = sched_cfs_period_timer;
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
+ cfs_b->distribute_running = 0;
}
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 455fa330de04..9683f458aec7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -346,6 +346,8 @@ struct cfs_bandwidth {
int nr_periods;
int nr_throttled;
u64 throttled_time;
+
+ bool distribute_running;
#endif
};
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index f704390db9fc..d8765c952fab 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -5,12 +5,12 @@
* Copyright (C) 2018 Joel Fernandes (Google) <joel@joelfernandes.org>
*/
+#include <linux/trace_clock.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
-#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/string.h>
@@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir
static void busy_wait(ulong time)
{
- ktime_t start, end;
- start = ktime_get();
+ u64 start, end;
+ start = trace_clock_local();
do {
- end = ktime_get();
+ end = trace_clock_local();
if (kthread_should_stop())
break;
- } while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000));
+ } while ((end - start) < (time * 1000));
}
static int preemptirq_delay_run(void *data)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 85f6b01431c7..d239004aaf29 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -738,16 +738,30 @@ static void free_synth_field(struct synth_field *field)
kfree(field);
}
-static struct synth_field *parse_synth_field(char *field_type,
- char *field_name)
+static struct synth_field *parse_synth_field(int argc, char **argv,
+ int *consumed)
{
struct synth_field *field;
+ const char *prefix = NULL;
+ char *field_type = argv[0], *field_name;
int len, ret = 0;
char *array;
if (field_type[0] == ';')
field_type++;
+ if (!strcmp(field_type, "unsigned")) {
+ if (argc < 3)
+ return ERR_PTR(-EINVAL);
+ prefix = "unsigned ";
+ field_type = argv[1];
+ field_name = argv[2];
+ *consumed = 3;
+ } else {
+ field_name = argv[1];
+ *consumed = 2;
+ }
+
len = strlen(field_name);
if (field_name[len - 1] == ';')
field_name[len - 1] = '\0';
@@ -760,11 +774,15 @@ static struct synth_field *parse_synth_field(char *field_type,
array = strchr(field_name, '[');
if (array)
len += strlen(array);
+ if (prefix)
+ len += strlen(prefix);
field->type = kzalloc(len, GFP_KERNEL);
if (!field->type) {
ret = -ENOMEM;
goto free;
}
+ if (prefix)
+ strcat(field->type, prefix);
strcat(field->type, field_type);
if (array) {
strcat(field->type, array);
@@ -1009,7 +1027,7 @@ static int create_synth_event(int argc, char **argv)
struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
struct synth_event *event = NULL;
bool delete_event = false;
- int i, n_fields = 0, ret = 0;
+ int i, consumed = 0, n_fields = 0, ret = 0;
char *name;
mutex_lock(&synth_event_mutex);
@@ -1061,16 +1079,16 @@ static int create_synth_event(int argc, char **argv)
goto err;
}
- field = parse_synth_field(argv[i], argv[i + 1]);
+ field = parse_synth_field(argc - i, &argv[i], &consumed);
if (IS_ERR(field)) {
ret = PTR_ERR(field);
goto err;
}
- fields[n_fields] = field;
- i++; n_fields++;
+ fields[n_fields++] = field;
+ i += consumed - 1;
}
- if (i < argc) {
+ if (i < argc && strcmp(argv[i], ";") != 0) {
ret = -EINVAL;
goto err;
}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index bf2c06ef9afc..a3be42304485 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -28,8 +28,8 @@
#include <linux/sched/task.h>
#include <linux/static_key.h>
-extern struct tracepoint * const __start___tracepoints_ptrs[];
-extern struct tracepoint * const __stop___tracepoints_ptrs[];
+extern tracepoint_ptr_t __start___tracepoints_ptrs[];
+extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
DEFINE_SRCU(tracepoint_srcu);
EXPORT_SYMBOL_GPL(tracepoint_srcu);
@@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
}
EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
- struct tracepoint * const *end,
+static void for_each_tracepoint_range(
+ tracepoint_ptr_t *begin, tracepoint_ptr_t *end,
void (*fct)(struct tracepoint *tp, void *priv),
void *priv)
{
+ tracepoint_ptr_t *iter;
+
if (!begin)
return;
-
- if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
- const int *iter;
-
- for (iter = (const int *)begin; iter < (const int *)end; iter++)
- fct(offset_to_ptr(iter), priv);
- } else {
- struct tracepoint * const *iter;
-
- for (iter = begin; iter < end; iter++)
- fct(*iter, priv);
- }
+ for (iter = begin; iter < end; iter++)
+ fct(tracepoint_ptr_deref(iter), priv);
}
#ifdef CONFIG_MODULES
diff --git a/lib/test_ida.c b/lib/test_ida.c
index 2d1637d8136b..b06880625961 100644
--- a/lib/test_ida.c
+++ b/lib/test_ida.c
@@ -150,10 +150,10 @@ static void ida_check_conv(struct ida *ida)
IDA_BUG_ON(ida, !ida_is_empty(ida));
}
+static DEFINE_IDA(ida);
+
static int ida_checks(void)
{
- DEFINE_IDA(ida);
-
IDA_BUG_ON(&ida, !ida_is_empty(&ida));
ida_check_alloc(&ida);
ida_check_destroy(&ida);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 00704060b7f7..deed97fba979 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+ pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
pmd_t pmd;
@@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
- if (pmd_present(pmd) && pmd_dirty(pmd))
+ if (pmd_present(pmd))
force_flush = true;
VM_BUG_ON(!pmd_none(*new_pmd));
@@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
}
pmd = move_soft_dirty_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
- if (new_ptl != old_ptl)
- spin_unlock(new_ptl);
if (force_flush)
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
- else
- *need_flush = true;
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
spin_unlock(old_ptl);
return true;
}
@@ -2885,9 +2883,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
if (!(pvmw->pmd && !pvmw->pte))
return;
- mmu_notifier_invalidate_range_start(mm, address,
- address + HPAGE_PMD_SIZE);
-
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
pmdval = *pvmw->pmd;
pmdp_invalidate(vma, address, pvmw->pmd);
@@ -2900,9 +2895,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
set_pmd_at(mm, address, pvmw->pmd, pmdswp);
page_remove_rmap(page, true);
put_page(page);
-
- mmu_notifier_invalidate_range_end(mm, address,
- address + HPAGE_PMD_SIZE);
}
void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
diff --git a/mm/mmap.c b/mm/mmap.c
index 5f2b2b184c60..f7cd9cb966c0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (flags & MAP_FIXED_NOREPLACE) {
struct vm_area_struct *vma = find_vma(mm, addr);
- if (vma && vma->vm_start <= addr)
+ if (vma && vma->vm_start < addr + len)
return -EEXIST;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..a9617e72e6b7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
unsigned long old_addr, unsigned long old_end,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
- unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
+ unsigned long new_addr, bool need_rmap_locks)
{
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
@@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
pte = ptep_get_and_clear(mm, old_addr, old_pte);
/*
- * If we are remapping a dirty PTE, make sure
+ * If we are remapping a valid PTE, make sure
* to flush TLB before we drop the PTL for the
- * old PTE or we may race with page_mkclean().
+ * PTE.
*
- * This check has to be done after we removed the
- * old PTE from page tables or another thread may
- * dirty it after the check and before the removal.
+ * NOTE! Both old and new PTL matter: the old one
+ * for racing with page_mkclean(), the new one to
+ * make sure the physical page stays valid until
+ * the TLB entry for the old mapping has been
+ * flushed.
*/
- if (pte_present(pte) && pte_dirty(pte))
+ if (pte_present(pte))
force_flush = true;
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
@@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
}
arch_leave_lazy_mmu_mode();
+ if (force_flush)
+ flush_tlb_range(vma, old_end - len, old_end);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
- if (force_flush)
- flush_tlb_range(vma, old_end - len, old_end);
- else
- *need_flush = true;
pte_unmap_unlock(old_pte - 1, old_ptl);
if (need_rmap_locks)
drop_rmap_locks(vma);
@@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
{
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
- bool need_flush = false;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_huge_pmd(vma, old_addr, new_addr,
- old_end, old_pmd, new_pmd,
- &need_flush);
+ old_end, old_pmd, new_pmd);
if (need_rmap_locks)
drop_rmap_locks(vma);
if (moved)
@@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (extent > next - new_addr)
extent = next - new_addr;
move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
- new_pmd, new_addr, need_rmap_locks, &need_flush);
+ new_pmd, new_addr, need_rmap_locks);
}
- if (need_flush)
- flush_tlb_range(vma, old_end-len, old_addr);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 546af0e73ac3..ff720f1ebf73 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -756,8 +756,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
return;
vlan->netpoll = NULL;
-
- __netpoll_free_async(netpoll);
+ __netpoll_free(netpoll);
}
#endif /* CONFIG_NET_POLL_CONTROLLER */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f47f8fad757a..ef9928d7b4fb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4937,31 +4937,27 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
hci_debugfs_create_conn(conn);
hci_conn_add_sysfs(conn);
- if (!status) {
- /* The remote features procedure is defined for master
- * role only. So only in case of an initiated connection
- * request the remote features.
- *
- * If the local controller supports slave-initiated features
- * exchange, then requesting the remote features in slave
- * role is possible. Otherwise just transition into the
- * connected state without requesting the remote features.
- */
- if (conn->out ||
- (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
- struct hci_cp_le_read_remote_features cp;
+ /* The remote features procedure is defined for master
+ * role only. So only in case of an initiated connection
+ * request the remote features.
+ *
+ * If the local controller supports slave-initiated features
+ * exchange, then requesting the remote features in slave
+ * role is possible. Otherwise just transition into the
+ * connected state without requesting the remote features.
+ */
+ if (conn->out ||
+ (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
+ struct hci_cp_le_read_remote_features cp;
- cp.handle = __cpu_to_le16(conn->handle);
+ cp.handle = __cpu_to_le16(conn->handle);
- hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
- sizeof(cp), &cp);
+ hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES,
+ sizeof(cp), &cp);
- hci_conn_hold(conn);
- } else {
- conn->state = BT_CONNECTED;
- hci_connect_cfm(conn, status);
- }
+ hci_conn_hold(conn);
} else {
+ conn->state = BT_CONNECTED;
hci_connect_cfm(conn, status);
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 514899f7f0d4..2146e0f3b6f8 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -680,9 +680,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan)
u16 result;
if (test_bit(FLAG_DEFER_SETUP, &chan->flags))
- result = L2CAP_CR_AUTHORIZATION;
+ result = L2CAP_CR_LE_AUTHORIZATION;
else
- result = L2CAP_CR_BAD_PSM;
+ result = L2CAP_CR_LE_BAD_PSM;
l2cap_state_change(chan, BT_DISCONN);
@@ -3670,7 +3670,7 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan)
rsp.mtu = cpu_to_le16(chan->imtu);
rsp.mps = cpu_to_le16(chan->mps);
rsp.credits = cpu_to_le16(chan->rx_credits);
- rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
+ rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS);
l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp),
&rsp);
@@ -3816,9 +3816,17 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
result = L2CAP_CR_NO_MEM;
+ /* Check for valid dynamic CID range (as per Erratum 3253) */
+ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_DYN_END) {
+ result = L2CAP_CR_INVALID_SCID;
+ goto response;
+ }
+
/* Check if we already have channel with that dcid */
- if (__l2cap_get_chan_by_dcid(conn, scid))
+ if (__l2cap_get_chan_by_dcid(conn, scid)) {
+ result = L2CAP_CR_SCID_IN_USE;
goto response;
+ }
chan = pchan->ops->new_connection(pchan);
if (!chan)
@@ -5280,7 +5288,7 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
credits = __le16_to_cpu(rsp->credits);
result = __le16_to_cpu(rsp->result);
- if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
+ if (result == L2CAP_CR_LE_SUCCESS && (mtu < 23 || mps < 23 ||
dcid < L2CAP_CID_DYN_START ||
dcid > L2CAP_CID_LE_DYN_END))
return -EPROTO;
@@ -5301,7 +5309,7 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
l2cap_chan_lock(chan);
switch (result) {
- case L2CAP_CR_SUCCESS:
+ case L2CAP_CR_LE_SUCCESS:
if (__l2cap_get_chan_by_dcid(conn, dcid)) {
err = -EBADSLT;
break;
@@ -5315,8 +5323,8 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
l2cap_chan_ready(chan);
break;
- case L2CAP_CR_AUTHENTICATION:
- case L2CAP_CR_ENCRYPTION:
+ case L2CAP_CR_LE_AUTHENTICATION:
+ case L2CAP_CR_LE_ENCRYPTION:
/* If we already have MITM protection we can't do
* anything.
*/
@@ -5459,7 +5467,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
&conn->hcon->dst, LE_LINK);
if (!pchan) {
- result = L2CAP_CR_BAD_PSM;
+ result = L2CAP_CR_LE_BAD_PSM;
chan = NULL;
goto response;
}
@@ -5469,28 +5477,28 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
SMP_ALLOW_STK)) {
- result = L2CAP_CR_AUTHENTICATION;
+ result = L2CAP_CR_LE_AUTHENTICATION;
chan = NULL;
goto response_unlock;
}
/* Check for valid dynamic CID range */
if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
- result = L2CAP_CR_INVALID_SCID;
+ result = L2CAP_CR_LE_INVALID_SCID;
chan = NULL;
goto response_unlock;
}
/* Check if we already have channel with that dcid */
if (__l2cap_get_chan_by_dcid(conn, scid)) {
- result = L2CAP_CR_SCID_IN_USE;
+ result = L2CAP_CR_LE_SCID_IN_USE;
chan = NULL;
goto response_unlock;
}
chan = pchan->ops->new_connection(pchan);
if (!chan) {
- result = L2CAP_CR_NO_MEM;
+ result = L2CAP_CR_LE_NO_MEM;
goto response_unlock;
}
@@ -5526,7 +5534,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
chan->ops->defer(chan);
} else {
l2cap_chan_ready(chan);
- result = L2CAP_CR_SUCCESS;
+ result = L2CAP_CR_LE_SUCCESS;
}
response_unlock:
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index b64e1649993b..94e88f510c5b 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -23,9 +23,11 @@ static void shutdown_umh(struct umh_info *info)
if (!info->pid)
return;
- tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
- if (tsk)
+ tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID);
+ if (tsk) {
force_sig(SIGKILL, tsk);
+ put_task_struct(tsk);
+ }
fput(info->pipe_to_umh);
fput(info->pipe_from_umh);
info->pid = 0;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index e411e40333e2..360ad66c21e9 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -151,7 +151,7 @@ static int br_switchdev_event(struct notifier_block *unused,
break;
}
br_fdb_offloaded_set(br, p, fdb_info->addr,
- fdb_info->vid);
+ fdb_info->vid, true);
break;
case SWITCHDEV_FDB_DEL_TO_BRIDGE:
fdb_info = ptr;
@@ -163,7 +163,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_OFFLOADED:
fdb_info = ptr;
br_fdb_offloaded_set(br, p, fdb_info->addr,
- fdb_info->vid);
+ fdb_info->vid, fdb_info->offloaded);
break;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e053a4e43758..c6abf927f0c9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -344,7 +344,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
p->np = NULL;
- __netpoll_free_async(np);
+ __netpoll_free(np);
}
#endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 74331690a390..e56ba3912a90 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1152,7 +1152,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
}
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid, bool offloaded)
{
struct net_bridge_fdb_entry *fdb;
@@ -1160,7 +1160,7 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
fdb = br_fdb_find(br, addr, vid);
if (fdb)
- fdb->offloaded = 1;
+ fdb->offloaded = offloaded;
spin_unlock_bh(&br->hash_lock);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 10ee39fdca5c..2920e06a5403 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -574,7 +574,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid,
bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid);
+ const unsigned char *addr, u16 vid, bool offloaded);
/* br_forward.c */
enum br_pkt_type {
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index d77f807420c4..b993df770675 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -103,7 +103,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
static void
br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
u16 vid, struct net_device *dev,
- bool added_by_user)
+ bool added_by_user, bool offloaded)
{
struct switchdev_notifier_fdb_info info;
unsigned long notifier_type;
@@ -111,6 +111,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
info.addr = mac;
info.vid = vid;
info.added_by_user = added_by_user;
+ info.offloaded = offloaded;
notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
call_switchdev_notifiers(notifier_type, dev, &info.info);
}
@@ -126,13 +127,15 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
- fdb->added_by_user);
+ fdb->added_by_user,
+ fdb->offloaded);
break;
case RTM_NEWNEIGH:
br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
- fdb->added_by_user);
+ fdb->added_by_user,
+ fdb->offloaded);
break;
}
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4cc603dfc9ef..d05402868575 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -928,6 +928,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
return -EINVAL;
}
+ if (info.cmd != cmd)
+ return -EINVAL;
+
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -2392,13 +2395,17 @@ roll_back:
return ret;
}
-static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_per_queue(struct net_device *dev,
+ void __user *useraddr, u32 sub_cmd)
{
struct ethtool_per_queue_op per_queue_opt;
if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
return -EFAULT;
+ if (per_queue_opt.sub_command != sub_cmd)
+ return -EINVAL;
+
switch (per_queue_opt.sub_command) {
case ETHTOOL_GCOALESCE:
return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
@@ -2769,7 +2776,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_get_phy_stats(dev, useraddr);
break;
case ETHTOOL_PERQUEUE:
- rc = ethtool_set_per_queue(dev, useraddr);
+ rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
break;
case ETHTOOL_GLINKSETTINGS:
rc = ethtool_get_link_ksettings(dev, useraddr);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 69c41cb3966d..ee605d9d8bd4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1167,8 +1167,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh->nud_state = new;
err = 0;
notify = old & NUD_VALID;
- if (((old & (NUD_INCOMPLETE | NUD_PROBE)) ||
- (flags & NEIGH_UPDATE_F_ADMIN)) &&
+ if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
(new & NUD_FAILED)) {
neigh_invalidate(neigh);
notify = 1;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1d1ba92f2d..5da9552b186b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -57,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu);
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void netpoll_async_cleanup(struct work_struct *work);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
@@ -312,7 +311,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
- rcu_read_lock_bh();
lockdep_assert_irqs_disabled();
npinfo = rcu_dereference_bh(np->dev->npinfo);
@@ -357,7 +355,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
skb_queue_tail(&npinfo->txq, skb);
schedule_delayed_work(&npinfo->tx_work,0);
}
- rcu_read_unlock_bh();
}
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
@@ -591,7 +588,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
np->dev = ndev;
strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
- INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
@@ -790,10 +786,6 @@ void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
- /* rtnl_dereference would be preferable here but
- * rcu_cleanup_netpoll path can put us in here safely without
- * holding the rtnl, so plain rcu_dereference it is
- */
npinfo = rtnl_dereference(np->dev->npinfo);
if (!npinfo)
return;
@@ -814,21 +806,16 @@ void __netpoll_cleanup(struct netpoll *np)
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
-static void netpoll_async_cleanup(struct work_struct *work)
+void __netpoll_free(struct netpoll *np)
{
- struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
+ ASSERT_RTNL();
- rtnl_lock();
+ /* Wait for transmitting packets to finish before freeing. */
+ synchronize_rcu_bh();
__netpoll_cleanup(np);
- rtnl_unlock();
kfree(np);
}
-
-void __netpoll_free_async(struct netpoll *np)
-{
- schedule_work(&np->cleanup_work);
-}
-EXPORT_SYMBOL_GPL(__netpoll_free_async);
+EXPORT_SYMBOL_GPL(__netpoll_free);
void netpoll_cleanup(struct netpoll *np)
{
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 54b961de9538..946de0e24c87 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1846,8 +1846,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
if (skb->ip_summed == CHECKSUM_COMPLETE) {
int delta = skb->len - len;
- skb->csum = csum_sub(skb->csum,
- skb_checksum(skb, len, delta, 0));
+ skb->csum = csum_block_sub(skb->csum,
+ skb_checksum(skb, len, delta, 0),
+ len);
}
return __pskb_trim(skb, len);
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3f840b6eea69..7d0c19e7edcf 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -722,7 +722,7 @@ static void dsa_slave_netpoll_cleanup(struct net_device *dev)
p->netpoll = NULL;
- __netpoll_free_async(netpoll);
+ __netpoll_free(netpoll);
}
static void dsa_slave_poll_controller(struct net_device *dev)
@@ -1478,6 +1478,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
netdev_dbg(dev, "fdb add failed err=%d\n", err);
break;
}
+ fdb_info->offloaded = true;
call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
&fdb_info->info);
break;
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 844806120f44..3e614cc824f7 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -315,8 +315,6 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
next_entry:
e++;
}
- e = 0;
- s_e = 0;
spin_lock_bh(lock);
list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
index ac110c1d55b5..a0aa13bcabda 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -60,6 +60,7 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
MODULE_ALIAS("ip_nat_snmp_basic");
+MODULE_ALIAS_NFCT_HELPER("snmp_trap");
#define SNMP_PORT 161
#define SNMP_TRAP_PORT 162
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b88081285fd1..9277abdd822a 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
return cwnd;
}
+/* With pacing at lower layers, there's often less data "in the network" than
+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
+ * we often have several skbs queued in the pacing layer with a pre-scheduled
+ * earliest departure time (EDT). BBR adapts its pacing rate based on the
+ * inflight level that it estimates has already been "baked in" by previous
+ * departure time decisions. We calculate a rough estimate of the number of our
+ * packets that might be in the network at the earliest departure time for the
+ * next skb scheduled:
+ * in_network_at_edt = inflight_at_edt - (EDT - now) * bw
+ * If we're increasing inflight, then we want to know if the transmit of the
+ * EDT skb will push inflight above the target, so inflight_at_edt includes
+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
+ * then estimate if inflight will sink too low just before the EDT transmit.
+ */
+static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 now_ns, edt_ns, interval_us;
+ u32 interval_delivered, inflight_at_edt;
+
+ now_ns = tp->tcp_clock_cache;
+ edt_ns = max(tp->tcp_wstamp_ns, now_ns);
+ interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
+ interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
+ inflight_at_edt = inflight_now;
+ if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */
+ inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */
+ if (interval_delivered >= inflight_at_edt)
+ return 0;
+ return inflight_at_edt - interval_delivered;
+}
+
/* An optimization in BBR to reduce losses: On the first round of recovery, we
* follow the packet conservation principle: send P packets per P packets acked.
* After that, we slow-start and send at most 2*P packets per P packets acked.
@@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
if (bbr->pacing_gain == BBR_UNIT)
return is_full_length; /* just use wall clock time */
- inflight = rs->prior_in_flight; /* what was in-flight before ACK? */
+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
bw = bbr_max_bw(sk);
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
@@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk)
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp;
- bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
- bbr_pacing_gain[bbr->cycle_idx];
}
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -507,8 +538,6 @@ static void bbr_reset_startup_mode(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_STARTUP;
- bbr->pacing_gain = bbr_high_gain;
- bbr->cwnd_gain = bbr_high_gain;
}
static void bbr_reset_probe_bw_mode(struct sock *sk)
@@ -516,8 +545,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_PROBE_BW;
- bbr->pacing_gain = BBR_UNIT;
- bbr->cwnd_gain = bbr_cwnd_gain;
bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
}
@@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */
- bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
- bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
tcp_sk(sk)->snd_ssthresh =
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
- tcp_packets_in_flight(tcp_sk(sk)) <=
+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
}
@@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
!bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
- bbr->pacing_gain = BBR_UNIT;
- bbr->cwnd_gain = BBR_UNIT;
bbr_save_cwnd(sk); /* note cwnd so we can restore it */
bbr->probe_rtt_done_stamp = 0;
}
@@ -827,6 +850,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
bbr->idle_restart = 0;
}
+static void bbr_update_gains(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ switch (bbr->mode) {
+ case BBR_STARTUP:
+ bbr->pacing_gain = bbr_high_gain;
+ bbr->cwnd_gain = bbr_high_gain;
+ break;
+ case BBR_DRAIN:
+ bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */
+ bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */
+ break;
+ case BBR_PROBE_BW:
+ bbr->pacing_gain = (bbr->lt_use_bw ?
+ BBR_UNIT :
+ bbr_pacing_gain[bbr->cycle_idx]);
+ bbr->cwnd_gain = bbr_cwnd_gain;
+ break;
+ case BBR_PROBE_RTT:
+ bbr->pacing_gain = BBR_UNIT;
+ bbr->cwnd_gain = BBR_UNIT;
+ break;
+ default:
+ WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
+ break;
+ }
+}
+
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
{
bbr_update_bw(sk, rs);
@@ -834,6 +886,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_check_full_bw_reached(sk, rs);
bbr_check_drain(sk, rs);
bbr_update_min_rtt(sk, rs);
+ bbr_update_gains(sk);
}
static void bbr_main(struct sock *sk, const struct rate_sample *rs)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d212e4cbc689..c07990a35ff3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2321,18 +2321,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+ /* "skb_mstamp_ns" is used as a start point for the retransmit timer */
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
+ list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+ goto repair; /* Skip network transmission */
+ }
+
if (tcp_pacing_check(sk))
break;
tso_segs = tcp_init_tso_segs(skb, mss_now);
BUG_ON(!tso_segs);
- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
- /* "skb_mstamp" is used as a start point for the retransmit timer */
- tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns);
- goto repair; /* Skip network transmission */
- }
-
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
if (push_one == 2)
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index a9162aa11af9..95df7f7f6328 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -99,8 +99,10 @@ void tcp_cleanup_ulp(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- sock_owned_by_me(sk);
-
+ /* No sock_owned_by_me() check here as at the time the
+ * stack calls this function, the socket is dead and
+ * about to be destroyed.
+ */
if (!icsk->icsk_ulp_ops)
return;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2496b12bf721..e39c284e2954 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4972,12 +4972,14 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
/* unicast address incl. temp addr */
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- if (++ip_idx < s_ip_idx)
- continue;
+ if (ip_idx < s_ip_idx)
+ goto next;
err = inet6_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
break;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+next:
+ ip_idx++;
}
break;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a0b6932c3afd..a9d06d4dd057 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1184,11 +1184,6 @@ route_lookup:
}
skb_dst_set(skb, dst);
- if (encap_limit >= 0) {
- init_tel_txopt(&opt, encap_limit);
- ipv6_push_frag_opts(skb, &opt.ops, &proto);
- }
-
if (hop_limit == 0) {
if (skb->protocol == htons(ETH_P_IP))
hop_limit = ip_hdr(skb)->ttl;
@@ -1210,6 +1205,11 @@ route_lookup:
if (err)
return err;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_frag_opts(skb, &opt.ops, &proto);
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6895e1dc0b03..21f6deb2aec9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
- /* callers have the socket lock and rtnl lock
- * so no other readers or writers of iml or its sflist
- */
+ write_lock_bh(&iml->sflock);
if (!iml->sflist) {
/* any-source empty exclude case */
- return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ } else {
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+ iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+ sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+ iml->sflist = NULL;
}
- err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
- iml->sflist->sl_count, iml->sflist->sl_addr, 0);
- sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
- iml->sflist = NULL;
+ write_unlock_bh(&iml->sflock);
return err;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9fd600e42f9d..e3226284e480 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -517,10 +517,11 @@ static void rt6_probe_deferred(struct work_struct *w)
static void rt6_probe(struct fib6_info *rt)
{
- struct __rt6_probe_work *work;
+ struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
struct neighbour *neigh;
struct net_device *dev;
+ struct inet6_dev *idev;
/*
* Okay, this does not seem to be appropriate
@@ -536,15 +537,12 @@ static void rt6_probe(struct fib6_info *rt)
nh_gw = &rt->fib6_nh.nh_gw;
dev = rt->fib6_nh.nh_dev;
rcu_read_lock_bh();
+ idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
- struct inet6_dev *idev;
-
if (neigh->nud_state & NUD_VALID)
goto out;
- idev = __in6_dev_get(dev);
- work = NULL;
write_lock(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
@@ -554,11 +552,13 @@ static void rt6_probe(struct fib6_info *rt)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else {
+ } else if (time_after(jiffies, rt->last_probe +
+ idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
if (work) {
+ rt->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 374e7d302f26..06d17ff3562f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -766,11 +766,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
ret = udpv6_queue_rcv_skb(sk, skb);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
+ /* a return value > 0 means to resubmit the input */
if (ret > 0)
- return -ret;
+ return ret;
return 0;
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef3defaf43b9..d35bcf92969c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
- while (nh + offset + 1 < skb->data ||
- pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+ while (nh + offset + sizeof(*exthdr) < skb->data ||
+ pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
nh = skb_network_header(skb);
exthdr = (struct ipv6_opt_hdr *)(nh + offset);
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0ac522b48a1..4ff89cb7c86f 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
llc_sk(sk)->sap = sap;
spin_lock_bh(&sap->sk_lock);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sap->sk_count++;
sk_nulls_add_node_rcu(sk, laddr_hb);
hlist_add_head(&llc->dev_hash_node, dev_hb);
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 08a8a6031fd7..7f2b46108a24 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig
@@ -10,3 +10,9 @@ config NET_NCSI
support. Enable this only if your system connects to a network
device via NCSI and the ethernet driver you're using supports
the protocol explicitly.
+config NCSI_OEM_CMD_GET_MAC
+ bool "Get NCSI OEM MAC Address"
+ depends on NET_NCSI
+ ---help---
+ This allows to get MAC address from NCSI firmware and set them back to
+ controller.
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 13c9b5eeb3b7..1dae77c54009 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -71,6 +71,13 @@ enum {
/* OEM Vendor Manufacture ID */
#define NCSI_OEM_MFR_MLX_ID 0x8119
#define NCSI_OEM_MFR_BCM_ID 0x113d
+/* Broadcom specific OEM Command */
+#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */
+/* OEM Command payload lengths*/
+#define NCSI_OEM_BCM_CMD_GMA_LEN 12
+/* Mac address offset in OEM response */
+#define BCM_MAC_ADDR_OFFSET 28
+
struct ncsi_channel_version {
u32 version; /* Supported BCD encoded NCSI version */
@@ -246,6 +253,7 @@ enum {
ncsi_dev_state_probe_dp,
ncsi_dev_state_config_sp = 0x0301,
ncsi_dev_state_config_cis,
+ ncsi_dev_state_config_oem_gma,
ncsi_dev_state_config_clear_vids,
ncsi_dev_state_config_svf,
ncsi_dev_state_config_ev,
@@ -279,6 +287,7 @@ struct ncsi_dev_priv {
#define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */
#define NCSI_DEV_HWA 2 /* Enabled HW arbitration */
#define NCSI_DEV_RESHUFFLE 4
+ unsigned int gma_flag; /* OEM GMA flag */
spinlock_t lock; /* Protect the NCSI device */
#if IS_ENABLED(CONFIG_IPV6)
unsigned int inet6_addr_num; /* Number of IPv6 addresses */
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 6aa0614d2d28..bfc43b28c7a6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -651,6 +651,72 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+
+/* NCSI OEM Command APIs */
+static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
+{
+ unsigned char data[NCSI_OEM_BCM_CMD_GMA_LEN];
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN;
+
+ memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN);
+ *(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID);
+ data[5] = NCSI_OEM_BCM_CMD_GMA;
+
+ nca->data = data;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+ return ret;
+}
+
+/* OEM Command handlers initialization */
+static struct ncsi_oem_gma_handler {
+ unsigned int mfr_id;
+ int (*handler)(struct ncsi_cmd_arg *nca);
+} ncsi_oem_gma_handlers[] = {
+ { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }
+};
+
+static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
+{
+ struct ncsi_oem_gma_handler *nch = NULL;
+ int i;
+
+ /* This function should only be called once, return if flag set */
+ if (nca->ndp->gma_flag == 1)
+ return -1;
+
+ /* Find gma handler for given manufacturer id */
+ for (i = 0; i < ARRAY_SIZE(ncsi_oem_gma_handlers); i++) {
+ if (ncsi_oem_gma_handlers[i].mfr_id == mf_id) {
+ if (ncsi_oem_gma_handlers[i].handler)
+ nch = &ncsi_oem_gma_handlers[i];
+ break;
+ }
+ }
+
+ if (!nch) {
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: No GMA handler available for MFR-ID (0x%x)\n",
+ mf_id);
+ return -1;
+ }
+
+ /* Set the flag for GMA command which should only be called once */
+ nca->ndp->gma_flag = 1;
+
+ /* Get Mac address from NCSI device */
+ return nch->handler(nca);
+}
+
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+
static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -701,7 +767,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
goto error;
}
+ nd->state = ncsi_dev_state_config_oem_gma;
+ break;
+ case ncsi_dev_state_config_oem_gma:
nd->state = ncsi_dev_state_config_clear_vids;
+ ret = -1;
+
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+ nca.type = NCSI_PKT_CMD_OEM;
+ nca.package = np->id;
+ nca.channel = nc->id;
+ ndp->pending_req_num = 1;
+ ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+
+ if (ret < 0)
+ schedule_work(&ndp->work);
+
break;
case ncsi_dev_state_config_clear_vids:
case ncsi_dev_state_config_svf:
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 0f2087c8d42a..4d3f06be38bd 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -165,6 +165,14 @@ struct ncsi_rsp_oem_pkt {
unsigned char data[]; /* Payload data */
};
+/* Broadcom Response Data */
+struct ncsi_rsp_oem_bcm_pkt {
+ unsigned char ver; /* Payload Version */
+ unsigned char type; /* OEM Command type */
+ __be16 len; /* Payload Length */
+ unsigned char data[]; /* Cmd specific Data */
+};
+
/* Get Link Status */
struct ncsi_rsp_gls_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 85fa59afae34..77e07ba3f493 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -611,19 +611,59 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
return 0;
}
+/* Response handler for Broadcom command Get Mac Address */
+static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ const struct net_device_ops *ops = ndev->netdev_ops;
+ struct ncsi_rsp_oem_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = 0;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
+ /* Increase mac address by 1 for BMC's address */
+ saddr.sa_data[ETH_ALEN - 1]++;
+ ret = ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0)
+ netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+
+ return ret;
+}
+
+/* Response handler for Broadcom card */
+static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_oem_bcm_pkt *bcm;
+ struct ncsi_rsp_oem_pkt *rsp;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+ bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data);
+
+ if (bcm->type == NCSI_OEM_BCM_CMD_GMA)
+ return ncsi_rsp_handler_oem_bcm_gma(nr);
+ return 0;
+}
+
static struct ncsi_rsp_oem_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_request *nr);
} ncsi_rsp_oem_handlers[] = {
{ NCSI_OEM_MFR_MLX_ID, NULL },
- { NCSI_OEM_MFR_BCM_ID, NULL }
+ { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }
};
/* Response handler for OEM command */
static int ncsi_rsp_handler_oem(struct ncsi_request *nr)
{
- struct ncsi_rsp_oem_pkt *rsp;
struct ncsi_rsp_oem_handler *nrh = NULL;
+ struct ncsi_rsp_oem_pkt *rsp;
unsigned int mfr_id, i;
/* Get the response header */
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 8a33dac4e805..e287da68d5fa 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -15,7 +15,7 @@
#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
#define ipset_dereference_protected(p, set) \
- __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
+ __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 185c633b6872..a3cc2ef8a48a 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -483,7 +483,7 @@ static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
flush_delayed_work(&flowtable->gc_work);
}
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+void nf_flow_table_cleanup(struct net_device *dev)
{
struct nf_flowtable *flowtable;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index b48545b84ce8..e7a50af1b3d6 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -350,7 +350,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
struct netlink_ext_ack *extack)
{
const struct nf_conntrack_l4proto *l4proto;
- __u16 l3num;
__u8 l4num;
int ret;
@@ -359,7 +358,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
!cda[CTA_TIMEOUT_DATA])
return -EINVAL;
- l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find_get(l4num);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 00db27dfd2ff..6f41dd74729d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -30,32 +30,27 @@ EXPORT_SYMBOL_GPL(nf_osf_fingers);
static inline int nf_osf_ttl(const struct sk_buff *skb,
int ttl_check, unsigned char f_ttl)
{
+ struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
const struct iphdr *ip = ip_hdr(skb);
-
- if (ttl_check != -1) {
- if (ttl_check == NF_OSF_TTL_TRUE)
- return ip->ttl == f_ttl;
- if (ttl_check == NF_OSF_TTL_NOCHECK)
- return 1;
- else if (ip->ttl <= f_ttl)
- return 1;
- else {
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
- int ret = 0;
-
- for_ifa(in_dev) {
- if (inet_ifa_match(ip->saddr, ifa)) {
- ret = (ip->ttl == f_ttl);
- break;
- }
- }
- endfor_ifa(in_dev);
-
- return ret;
+ int ret = 0;
+
+ if (ttl_check == NF_OSF_TTL_TRUE)
+ return ip->ttl == f_ttl;
+ if (ttl_check == NF_OSF_TTL_NOCHECK)
+ return 1;
+ else if (ip->ttl <= f_ttl)
+ return 1;
+
+ for_ifa(in_dev) {
+ if (inet_ifa_match(ip->saddr, ifa)) {
+ ret = (ip->ttl == f_ttl);
+ break;
}
}
- return ip->ttl == f_ttl;
+ endfor_ifa(in_dev);
+
+ return ret;
}
struct nf_osf_hdr_ctx {
@@ -213,7 +208,7 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
if (!tcp)
return false;
- ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
+ ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0;
list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
@@ -257,7 +252,8 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
EXPORT_SYMBOL_GPL(nf_osf_match);
const char *nf_osf_find(const struct sk_buff *skb,
- const struct list_head *nf_osf_fingers)
+ const struct list_head *nf_osf_fingers,
+ const int ttl_check)
{
const struct iphdr *ip = ip_hdr(skb);
const struct nf_osf_user_finger *f;
@@ -275,7 +271,7 @@ const char *nf_osf_find(const struct sk_buff *skb,
list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
f = &kf->finger;
- if (!nf_osf_match_one(skb, f, -1, &ctx))
+ if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
continue;
genre = f->genre;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 2cc1e0ef56e8..15cc62b293d6 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -46,8 +46,6 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg_dev, sizeof(int));
}
-static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
-
static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_netdev *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index d6bab8c3cbb0..e82d9a966c45 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -201,7 +201,7 @@ static int flow_offload_netdev_event(struct notifier_block *this,
if (event != NETDEV_DOWN)
return NOTIFY_DONE;
- nf_flow_table_cleanup(dev_net(dev), dev);
+ nf_flow_table_cleanup(dev);
return NOTIFY_DONE;
}
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 8abb9891cdf2..d7694e7255a0 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -53,8 +53,6 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg_dev, sizeof(int));
}
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
@@ -169,8 +167,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return nft_validate_register_load(priv->sreg_addr, addr_len);
}
-static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
-
static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_fwd_neigh *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index a35fb59ace73..0b452fd470c4 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -6,10 +6,12 @@
struct nft_osf {
enum nft_registers dreg:8;
+ u8 ttl;
};
static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
[NFTA_OSF_DREG] = { .type = NLA_U32 },
+ [NFTA_OSF_TTL] = { .type = NLA_U8 },
};
static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
@@ -33,7 +35,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
return;
}
- os_name = nf_osf_find(skb, nf_osf_fingers);
+ os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl);
if (!os_name)
strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
else
@@ -46,6 +48,14 @@ static int nft_osf_init(const struct nft_ctx *ctx,
{
struct nft_osf *priv = nft_expr_priv(expr);
int err;
+ u8 ttl;
+
+ if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+ ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
+ if (ttl > 2)
+ return -EINVAL;
+ priv->ttl = ttl;
+ }
priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
err = nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -60,6 +70,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_osf *priv = nft_expr_priv(expr);
+ if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
+ goto nla_put_failure;
+
if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
goto nla_put_failure;
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 3cf71a2e375b..5322609f7662 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -118,12 +118,13 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
struct nft_regs *regs,
- const struct xfrm_state *state,
- u8 family)
+ const struct xfrm_state *state)
{
u32 *dest = &regs->data[priv->dreg];
- if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) {
+ if (!xfrm_state_addr_ok(priv->key,
+ state->props.family,
+ state->props.mode)) {
regs->verdict.code = NFT_BREAK;
return;
}
@@ -169,7 +170,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
}
state = sp->xvec[priv->spnum];
- nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
+ nft_xfrm_state_get_key(priv, regs, state);
}
static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
@@ -184,7 +185,7 @@ static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
if (i < priv->spnum)
continue;
- nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt));
+ nft_xfrm_state_get_key(priv, regs, dst->xfrm);
return;
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index bf7bba80e24c..7a103553d10d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -40,14 +40,8 @@
static bool
xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
{
- const struct xt_osf_info *info = p->matchinfo;
- struct net *net = xt_net(p);
-
- if (!info)
- return false;
-
return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
- xt_out(p), info, net, nf_osf_fingers);
+ xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers);
}
static struct xt_match xt_osf_match = {
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index fceae245eb03..10d61a6eed71 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,11 @@
#include <linux/netfilter/xt_quota.h>
#include <linux/module.h>
+struct xt_quota_priv {
+ spinlock_t lock;
+ uint64_t quota;
+};
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -21,48 +26,54 @@ static bool
quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct xt_quota_info *q = (void *)par->matchinfo;
- u64 current_count = atomic64_read(&q->counter);
+ struct xt_quota_priv *priv = q->master;
bool ret = q->flags & XT_QUOTA_INVERT;
- u64 old_count, new_count;
-
- do {
- if (current_count == 1)
- return ret;
- if (current_count <= skb->len) {
- atomic64_set(&q->counter, 1);
- return ret;
- }
- old_count = current_count;
- new_count = current_count - skb->len;
- current_count = atomic64_cmpxchg(&q->counter, old_count,
- new_count);
- } while (current_count != old_count);
- return !ret;
+
+ spin_lock_bh(&priv->lock);
+ if (priv->quota >= skb->len) {
+ priv->quota -= skb->len;
+ ret = !ret;
+ } else {
+ /* we do not allow even small packets from now on */
+ priv->quota = 0;
+ }
+ spin_unlock_bh(&priv->lock);
+
+ return ret;
}
static int quota_mt_check(const struct xt_mtchk_param *par)
{
struct xt_quota_info *q = par->matchinfo;
- BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64));
-
if (q->flags & ~XT_QUOTA_MASK)
return -EINVAL;
- if (atomic64_read(&q->counter) > q->quota + 1)
- return -ERANGE;
- if (atomic64_read(&q->counter) == 0)
- atomic64_set(&q->counter, q->quota + 1);
+ q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
+ if (q->master == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&q->master->lock);
+ q->master->quota = q->quota;
return 0;
}
+static void quota_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ const struct xt_quota_info *q = par->matchinfo;
+
+ kfree(q->master);
+}
+
static struct xt_match quota_mt_reg __read_mostly = {
.name = "quota",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = quota_mt,
.checkentry = quota_mt_check,
+ .destroy = quota_mt_destroy,
.matchsize = sizeof(struct xt_quota_info),
+ .usersize = offsetof(struct xt_quota_info, master),
.me = THIS_MODULE,
};
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index e0d8ca03169a..44860505246d 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_connection *conn;
- struct rxrpc_peer *peer;
+ struct rxrpc_peer *peer = NULL;
struct rxrpc_call *call;
_enter("");
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index cad0691c2bb4..0906e51d3cfb 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -139,7 +139,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
udp_sk(usk)->gro_complete = NULL;
udp_encap_enable();
-#if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
if (local->srx.transport.family == AF_INET6)
udpv6_encap_enable();
#endif
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 0f0b499d1202..189418888839 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -572,7 +572,8 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
whdr.flags ^= RXRPC_CLIENT_INITIATED;
whdr.flags &= RXRPC_CLIENT_INITIATED;
- ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
+ ret = kernel_sendmsg(local->socket, &msg,
+ iov, ioc, size);
if (ret < 0)
trace_rxrpc_tx_fail(local->debug_id, 0, ret,
rxrpc_tx_point_reject);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 7feb611c7258..bc05af89fc38 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -197,6 +197,7 @@ void rxrpc_error_report(struct sock *sk)
rxrpc_store_error(peer, serr);
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+ rxrpc_put_peer(peer);
_leave("");
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 43c8559aca56..f427a1e00e7e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+
/* The list of all installed classifier types */
static LIST_HEAD(tcf_proto_base);
@@ -1304,7 +1306,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
replay:
tp_created = 0;
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
return err;
@@ -1454,7 +1456,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
return err;
@@ -1570,7 +1572,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
void *fh = NULL;
int err;
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
return err;
@@ -1937,7 +1939,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
return -EPERM;
replay:
- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
return err;
@@ -2055,7 +2057,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
+ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
cb->extack);
if (err)
return err;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index cf5c714ae786..022bca98bde6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1318,10 +1318,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
return 0;
}
-/*
- * Delete/get qdisc.
- */
-
const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
[TCA_KIND] = { .type = NLA_STRING },
[TCA_OPTIONS] = { .type = NLA_NESTED },
@@ -1334,6 +1330,10 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
[TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
};
+/*
+ * Delete/get qdisc.
+ */
+
static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -2070,7 +2070,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
if (tcm->tcm_parent) {
q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
- if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+ if (q && q != root &&
+ tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
return -1;
return 0;
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 297d9cf960b9..a827a1f562bf 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1450,7 +1450,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
/* Get the lowest pmtu of all the transports. */
list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
if (t->pmtu_pending && t->dst) {
- sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst));
+ sctp_transport_update_pmtu(t,
+ atomic_read(&t->mtu_info));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 9bbc5f92c941..5c36a99882ed 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -395,6 +395,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
return;
if (sock_owned_by_user(sk)) {
+ atomic_set(&t->mtu_info, pmtu);
asoc->pmtu_pending = 1;
t->pmtu_pending = 1;
return;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 7f849b01ec8e..67939ad99c01 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -120,6 +120,12 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_assoc_sync_pmtu(asoc);
}
+ if (asoc->pmtu_pending) {
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
+ asoc->pmtu_pending = 0;
+ }
+
/* If there a is a prepend chunk stick it on the list before
* any other chunks get appended.
*/
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 42191ed9902b..9cb854b05342 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -385,9 +385,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
asoc->outqueue.outstanding_bytes -= sctp_data_size(chk);
}
- msg_len -= SCTP_DATA_SNDSIZE(chk) +
- sizeof(struct sk_buff) +
- sizeof(struct sctp_chunk);
+ msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
if (msg_len <= 0)
break;
}
@@ -421,9 +419,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
}
- msg_len -= SCTP_DATA_SNDSIZE(chk) +
- sizeof(struct sk_buff) +
- sizeof(struct sctp_chunk);
+ msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk);
sctp_chunk_free(chk);
if (msg_len <= 0)
break;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f73e9d38d5ba..fc0386e8ff23 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -83,7 +83,7 @@
#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
-static int sctp_writeable(struct sock *sk);
+static bool sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
size_t msg_len);
@@ -119,25 +119,10 @@ static void sctp_enter_memory_pressure(struct sock *sk)
/* Get the sndbuf space available at the time on the association. */
static inline int sctp_wspace(struct sctp_association *asoc)
{
- int amt;
+ struct sock *sk = asoc->base.sk;
- if (asoc->ep->sndbuf_policy)
- amt = asoc->sndbuf_used;
- else
- amt = sk_wmem_alloc_get(asoc->base.sk);
-
- if (amt >= asoc->base.sk->sk_sndbuf) {
- if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
- amt = 0;
- else {
- amt = sk_stream_wspace(asoc->base.sk);
- if (amt < 0)
- amt = 0;
- }
- } else {
- amt = asoc->base.sk->sk_sndbuf - amt;
- }
- return amt;
+ return asoc->ep->sndbuf_policy ? sk->sk_sndbuf - asoc->sndbuf_used
+ : sk_stream_wspace(sk);
}
/* Increment the used sndbuf space count of the corresponding association by
@@ -166,12 +151,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
/* Save the chunk pointer in skb for sctp_wfree to use later. */
skb_shinfo(chunk->skb)->destructor_arg = chunk;
- asoc->sndbuf_used += SCTP_DATA_SNDSIZE(chunk) +
- sizeof(struct sk_buff) +
- sizeof(struct sctp_chunk);
-
refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
- sk->sk_wmem_queued += chunk->skb->truesize;
+ asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
+ sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
sk_mem_charge(sk, chunk->skb->truesize);
}
@@ -271,11 +253,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
spin_lock_bh(&sctp_assocs_id_lock);
asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id);
+ if (asoc && (asoc->base.sk != sk || asoc->base.dead))
+ asoc = NULL;
spin_unlock_bh(&sctp_assocs_id_lock);
- if (!asoc || (asoc->base.sk != sk) || asoc->base.dead)
- return NULL;
-
return asoc;
}
@@ -1928,10 +1909,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
asoc->pmtu_pending = 0;
}
- if (sctp_wspace(asoc) < msg_len)
+ if (sctp_wspace(asoc) < (int)msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
- if (!sctp_wspace(asoc)) {
+ if (sctp_wspace(asoc) <= 0) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err)
@@ -1946,8 +1927,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (sp->strm_interleave) {
timeo = sock_sndtimeo(sk, 0);
err = sctp_wait_for_connect(asoc, &timeo);
- if (err)
+ if (err) {
+ err = -ESRCH;
goto err;
+ }
} else {
wait_connect = true;
}
@@ -7100,14 +7083,14 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (policy & ~SCTP_PR_SCTP_MASK)
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
if (!asoc)
goto out;
- if (policy == SCTP_PR_SCTP_NONE) {
+ if (policy & SCTP_PR_SCTP_ALL) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7159,7 +7142,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (policy & ~SCTP_PR_SCTP_MASK)
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
@@ -7175,7 +7158,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
goto out;
}
- if (policy == SCTP_PR_SCTP_NONE) {
+ if (policy == SCTP_PR_SCTP_ALL) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -8460,17 +8443,11 @@ static void sctp_wfree(struct sk_buff *skb)
struct sctp_association *asoc = chunk->asoc;
struct sock *sk = asoc->base.sk;
- asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk) +
- sizeof(struct sk_buff) +
- sizeof(struct sctp_chunk);
-
- WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
-
- /*
- * This undoes what is done via sctp_set_owner_w and sk_mem_charge
- */
- sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
+ sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+ asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
+ WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
+ &sk->sk_wmem_alloc));
if (chunk->shkey) {
struct sctp_shared_key *shkey = chunk->shkey;
@@ -8544,7 +8521,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
goto do_error;
if (signal_pending(current))
goto do_interrupted;
- if (msg_len <= sctp_wspace(asoc))
+ if ((int)msg_len <= sctp_wspace(asoc))
break;
/* Let another process have a go. Since we are going
@@ -8619,14 +8596,9 @@ void sctp_write_space(struct sock *sk)
* UDP-style sockets or TCP-style sockets, this code should work.
* - Daisy
*/
-static int sctp_writeable(struct sock *sk)
+static bool sctp_writeable(struct sock *sk)
{
- int amt = 0;
-
- amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amt < 0)
- amt = 0;
- return amt;
+ return sk->sk_sndbuf > sk->sk_wmem_queued;
}
/* Wait for an association to go into ESTABLISHED state. If timeout is 0,
diff --git a/net/socket.c b/net/socket.c
index 713dc4833d40..b68801c7d0ab 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2875,9 +2875,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
copy_in_user(&rxnfc->fs.ring_cookie,
&compat_rxnfc->fs.ring_cookie,
(void __user *)(&rxnfc->fs.location + 1) -
- (void __user *)&rxnfc->fs.ring_cookie) ||
- copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
+ (void __user *)&rxnfc->fs.ring_cookie))
+ return -EFAULT;
+ if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+ if (put_user(rule_cnt, &rxnfc->rule_cnt))
+ return -EFAULT;
+ } else if (copy_in_user(&rxnfc->rule_cnt,
+ &compat_rxnfc->rule_cnt,
+ sizeof(rxnfc->rule_cnt)))
return -EFAULT;
}
diff --git a/net/tipc/group.c b/net/tipc/group.c
index e82f13cb2dc5..06fee142f09f 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -666,6 +666,7 @@ static void tipc_group_create_event(struct tipc_group *grp,
struct sk_buff *skb;
struct tipc_msg *hdr;
+ memset(&evt, 0, sizeof(evt));
evt.event = event;
evt.found_lower = m->instance;
evt.found_upper = m->instance;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f6552e4f4b43..201c3b5bc96b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1041,6 +1041,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
if (r->last_retransm != buf_seqno(skb)) {
r->last_retransm = buf_seqno(skb);
r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
+ r->stale_cnt = 0;
} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
link_retransmit_failure(l, skb);
if (link_is_bc_sndlink(l))
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 51b4b96f89db..3cfeb9df64b0 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
struct sk_buff *buf;
struct distr_item *item;
- list_del(&publ->binding_node);
+ list_del_rcu(&publ->binding_node);
if (publ->scope == TIPC_NODE_SCOPE)
return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
- list_for_each_entry(publ, pls, binding_node) {
+ list_for_each_entry_rcu(publ, pls, binding_node) {
/* Prepare next buffer: */
if (!skb) {
skb = named_prepare_buf(net, PUBLICATION, msg_rem,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d1edfa3cad61..98d34fb61744 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -225,6 +225,8 @@ static inline void unix_release_addr(struct unix_address *addr)
static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
{
+ *hashp = 0;
+
if (len <= sizeof(short) || len > sizeof(*sunaddr))
return -EINVAL;
if (!sunaddr || sunaddr->sun_family != AF_UNIX)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 0577cd49aa72..07156f43d295 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -754,6 +754,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_destruct = xsk_destruct;
sk_refcnt_debug_inc(sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
spin_lock_init(&xs->tx_completion_lock);
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 61be810389d8..ce66323102f9 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -13,7 +13,7 @@ static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr)
static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr)
{
- return ntohl(addr->a6[2] ^ addr->a6[3]);
+ return jhash2((__force u32 *)addr->a6, 4, 0);
}
static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
@@ -26,8 +26,7 @@ static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr,
static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr,
const xfrm_address_t *saddr)
{
- return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
- saddr->a6[2] ^ saddr->a6[3]);
+ return __xfrm6_addr_hash(daddr) ^ __xfrm6_addr_hash(saddr);
}
static inline u32 __bits2mask32(__u8 bits)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index be3520e429c9..684c0bc01e2c 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -131,7 +131,7 @@ struct sec_path *secpath_dup(struct sec_path *src)
sp->len = 0;
sp->olen = 0;
- memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH]));
+ memset(sp->ovec, 0, sizeof(sp->ovec));
if (src) {
int i;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dc5b20bf29cf..d679fa0f44b3 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -116,6 +116,9 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
static void xfrmi_dev_free(struct net_device *dev)
{
+ struct xfrm_if *xi = netdev_priv(dev);
+
+ gro_cells_destroy(&xi->gro_cells);
free_percpu(dev->tstats);
}
@@ -561,9 +564,6 @@ static void xfrmi_get_stats64(struct net_device *dev,
{
int cpu;
- if (!dev->tstats)
- return;
-
for_each_possible_cpu(cpu) {
struct pcpu_sw_netstats *stats;
struct pcpu_sw_netstats tmp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f094d4b3520d..119a427d9b2b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -632,9 +632,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
break;
}
if (newpos)
- hlist_add_behind(&policy->bydst, newpos);
+ hlist_add_behind_rcu(&policy->bydst, newpos);
else
- hlist_add_head(&policy->bydst, chain);
+ hlist_add_head_rcu(&policy->bydst, chain);
}
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -774,9 +774,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
break;
}
if (newpos)
- hlist_add_behind(&policy->bydst, newpos);
+ hlist_add_behind_rcu(&policy->bydst, newpos);
else
- hlist_add_head(&policy->bydst, chain);
+ hlist_add_head_rcu(&policy->bydst, chain);
__xfrm_policy_link(policy, dir);
/* After previous checking, family can either be AF_INET or AF_INET6 */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 86299efa804a..fd23d5778ea1 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -377,6 +377,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 07548de5c988..251be353f950 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_HPAGE_1M 156
#define KVM_CAP_NESTED_STATE 157
#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
+#define KVM_CAP_MSR_PLATFORM_INFO 159
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 120037496f77..5afb11b30fca 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -36,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void)
__tracing_path_set("", mnt);
- return mnt;
+ return tracing_path;
}
static const char *tracing_path_debugfs_mount(void)
@@ -49,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void)
__tracing_path_set("tracing/", mnt);
- return mnt;
+ return tracing_path;
}
const char *tracing_path_mount(void)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index f6d1a03c7523..e30d20fb482d 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -833,7 +833,7 @@ ifndef NO_JVMTI
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
else
ifneq (,$(wildcard /usr/sbin/alternatives))
- JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+ JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
endif
endif
ifndef JDIR
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5224ade3d5af..0be411695379 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -635,7 +635,7 @@ $(LIBPERF_IN): prepare FORCE
$(LIB_FILE): $(LIBPERF_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
-LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
$(LIBTRACEEVENT): FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 76e12bcd1765..b2188e623e22 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -981,6 +981,7 @@ int cmd_report(int argc, const char **argv)
.id_index = perf_event__process_id_index,
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
+ .event_update = perf_event__process_event_update,
.feature = process_feature_event,
.ordered_events = true,
.ordering_requires_timestamps = true,
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
index d40498f2cb1e..635c09fda1d9 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
@@ -188,7 +188,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xb",
"EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
- "Filter": "filter_band0=1200",
+ "Filter": "filter_band0=12",
"MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_1200mhz_cycles %",
"PerPkg": "1",
@@ -199,7 +199,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xc",
"EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
- "Filter": "filter_band1=2000",
+ "Filter": "filter_band1=20",
"MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_2000mhz_cycles %",
"PerPkg": "1",
@@ -210,7 +210,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd",
"EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
- "Filter": "filter_band2=3000",
+ "Filter": "filter_band2=30",
"MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_3000mhz_cycles %",
"PerPkg": "1",
@@ -221,7 +221,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xe",
"EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
- "Filter": "filter_band3=4000",
+ "Filter": "filter_band3=40",
"MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_4000mhz_cycles %",
"PerPkg": "1",
@@ -232,7 +232,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xb",
"EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band0=1200",
+ "Filter": "edge=1,filter_band0=12",
"MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_1200mhz_cycles %",
"PerPkg": "1",
@@ -243,7 +243,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xc",
"EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band1=2000",
+ "Filter": "edge=1,filter_band1=20",
"MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_2000mhz_cycles %",
"PerPkg": "1",
@@ -254,7 +254,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd",
"EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band2=4000",
+ "Filter": "edge=1,filter_band2=30",
"MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_3000mhz_cycles %",
"PerPkg": "1",
@@ -265,7 +265,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xe",
"EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band3=4000",
+ "Filter": "edge=1,filter_band3=40",
"MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_4000mhz_cycles %",
"PerPkg": "1",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
index 16034bfd06dd..8755693d86c6 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
@@ -187,7 +187,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xb",
"EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
- "Filter": "filter_band0=1200",
+ "Filter": "filter_band0=12",
"MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_1200mhz_cycles %",
"PerPkg": "1",
@@ -198,7 +198,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xc",
"EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
- "Filter": "filter_band1=2000",
+ "Filter": "filter_band1=20",
"MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_2000mhz_cycles %",
"PerPkg": "1",
@@ -209,7 +209,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd",
"EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
- "Filter": "filter_band2=3000",
+ "Filter": "filter_band2=30",
"MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_3000mhz_cycles %",
"PerPkg": "1",
@@ -220,7 +220,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xe",
"EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
- "Filter": "filter_band3=4000",
+ "Filter": "filter_band3=40",
"MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_4000mhz_cycles %",
"PerPkg": "1",
@@ -231,7 +231,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xb",
"EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band0=1200",
+ "Filter": "edge=1,filter_band0=12",
"MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_1200mhz_cycles %",
"PerPkg": "1",
@@ -242,7 +242,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xc",
"EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band1=2000",
+ "Filter": "edge=1,filter_band1=20",
"MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_2000mhz_cycles %",
"PerPkg": "1",
@@ -253,7 +253,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd",
"EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band2=4000",
+ "Filter": "edge=1,filter_band2=30",
"MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_3000mhz_cycles %",
"PerPkg": "1",
@@ -264,7 +264,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xe",
"EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
- "Filter": "edge=1,filter_band3=4000",
+ "Filter": "edge=1,filter_band3=40",
"MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
"MetricName": "freq_ge_4000mhz_cycles %",
"PerPkg": "1",
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 0cd42150f712..bc646185f8d9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max
}
*size += sizeof(struct cpu_map_data);
+ *size = PERF_ALIGN(*size, sizeof(u64));
return zalloc(*size);
}
@@ -1560,26 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
return NULL;
}
-try_again:
+
al->map = map_groups__find(mg, al->addr);
- if (al->map == NULL) {
- /*
- * If this is outside of all known maps, and is a negative
- * address, try to look it up in the kernel dso, as it might be
- * a vsyscall or vdso (which executes in user-mode).
- *
- * XXX This is nasty, we should have a symbol list in the
- * "[vdso]" dso, but for now lets use the old trick of looking
- * in the whole kernel symbol list.
- */
- if (cpumode == PERF_RECORD_MISC_USER && machine &&
- mg != &machine->kmaps &&
- machine__kernel_ip(machine, al->addr)) {
- mg = &machine->kmaps;
- load_map = true;
- goto try_again;
- }
- } else {
+ if (al->map != NULL) {
/*
* Kernel maps might be changed when loading symbols so loading
* must be done prior to using kernel maps.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1a61628a1c12..e596ae358c4d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1089,6 +1089,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->exclude_user = 1;
}
+ if (evsel->own_cpus)
+ evsel->attr.read_format |= PERF_FORMAT_ID;
+
/*
* Apply event specific term settings,
* it overloads any global configuration.
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index afd68524ffa9..7799788f662f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -930,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
static __u64 pmu_format_max_value(const unsigned long *format)
{
- __u64 w = 0;
- int fbit;
-
- for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
- w |= (1ULL << fbit);
+ int w;
- return w;
+ w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+ if (!w)
+ return 0;
+ if (w < 64)
+ return (1ULL << w) - 1;
+ return -1;
}
/*
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 09d6746e6ec8..e767c4a9d4d2 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso,
struct symbol *inline_sym;
char *demangled = NULL;
+ if (!funcname)
+ funcname = "??";
+
if (dso) {
demangled = dso__demangle_sym(dso, 0, funcname);
if (demangled)
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
index a72df93cf1f8..128f0ab24307 100755
--- a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -141,6 +141,10 @@ echo "Import devices from localhost - should work"
src/usbip attach -r localhost -b $busid;
echo "=============================================================="
+# Wait for sysfs file to be updated. Without this sleep, usbip port
+# shows no imported devices.
+sleep 3;
+
echo "List imported devices - expect to see imported devices";
src/usbip port;
echo "=============================================================="
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
new file mode 100644
index 000000000000..88e6c3f43006
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test synthetic_events syntax parser"
+
+echo > synthetic_events
+
+# synthetic event must have a field
+! echo "myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
+# synthetic event must be found in synthetic_events
+grep "myevent[[:space:]]u64 var1" synthetic_events
+
+# it is not possible to add same name event
+! echo "myevent u64 var2" >> synthetic_events
+
+# Non-append open will cleanup all events and add new one
+echo "myevent u64 var2" > synthetic_events
+
+# multiple fields with different spaces
+echo "myevent u64 var1; u64 var2;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ;u64 var2" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+
+# test field types
+echo "myevent u32 var" > synthetic_events
+echo "myevent u16 var" > synthetic_events
+echo "myevent u8 var" > synthetic_events
+echo "myevent s64 var" > synthetic_events
+echo "myevent s32 var" > synthetic_events
+echo "myevent s16 var" > synthetic_events
+echo "myevent s8 var" > synthetic_events
+
+echo "myevent char var" > synthetic_events
+echo "myevent int var" > synthetic_events
+echo "myevent long var" > synthetic_events
+echo "myevent pid_t var" > synthetic_events
+
+echo "myevent unsigned char var" > synthetic_events
+echo "myevent unsigned int var" > synthetic_events
+echo "myevent unsigned long var" > synthetic_events
+grep "myevent[[:space:]]unsigned long var" synthetic_events
+
+# test string type
+echo "myevent char var[10]" > synthetic_events
+grep "myevent[[:space:]]char\[10\] var" synthetic_events
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index cad14cd0ea92..b5277106df1f 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -437,14 +437,19 @@ void enable_fastopen(void)
}
}
-static struct rlimit rlim_old, rlim_new;
+static struct rlimit rlim_old;
static __attribute__((constructor)) void main_ctor(void)
{
getrlimit(RLIMIT_MEMLOCK, &rlim_old);
- rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
- rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
- setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+
+ if (rlim_old.rlim_cur != RLIM_INFINITY) {
+ struct rlimit rlim_new;
+
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
}
static __attribute__((destructor)) void main_dtor(void)