diff options
349 files changed, 21980 insertions, 4119 deletions
diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst index d351e880a2f6..a2738050c4f0 100644 --- a/Documentation/core-api/idr.rst +++ b/Documentation/core-api/idr.rst @@ -1,4 +1,4 @@ -.. SPDX-License-Identifier: CC-BY-SA-4.0 +.. SPDX-License-Identifier: GPL-2.0+ ============= ID Allocation diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt index e22d8cfea687..5100358177c9 100644 --- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -1,4 +1,4 @@ -Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY +Micrel KSZ9021/KSZ9031/KSZ9131 Gigabit Ethernet PHY Some boards require special tuning values, particularly when it comes to clock delays. You can specify clock delay values in the PHY OF @@ -64,6 +64,32 @@ KSZ9031: Attention: The link partner must be configurable as slave otherwise no link will be established. +KSZ9131: + + All skew control options are specified in picoseconds. The increment + step is 100ps. Unlike KSZ9031, the values represent picoseccond delays. + A negative value can be assigned as rxc-skew-psec = <(-100)>;. + + Optional properties: + + Range of the value -700 to 2400, default value 0: + + - rxc-skew-psec : Skew control of RX clock pad + - txc-skew-psec : Skew control of TX clock pad + + Range of the value -700 to 800, default value 0: + + - rxdv-skew-psec : Skew control of RX CTL pad + - txen-skew-psec : Skew control of TX CTL pad + - rxd0-skew-psec : Skew control of RX data 0 pad + - rxd1-skew-psec : Skew control of RX data 1 pad + - rxd2-skew-psec : Skew control of RX data 2 pad + - rxd3-skew-psec : Skew control of RX data 3 pad + - txd0-skew-psec : Skew control of TX data 0 pad + - txd1-skew-psec : Skew control of TX data 1 pad + - txd2-skew-psec : Skew control of TX data 2 pad + - txd3-skew-psec : Skew control of TX data 3 pad + Examples: mdio { diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 2d239770b95f..eb0754475dd0 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -70,12 +70,6 @@ driver.txt - Softnet driver issues. ena.txt - info on Amazon's Elastic Network Adapter (ENA) -e100.txt - - info on Intel's EtherExpress PRO/100 line of 10/100 boards -e1000.txt - - info on Intel's E1000 line of gigabit ethernet boards -e1000e.txt - - README for the Intel Gigabit Ethernet Driver (e1000e). eql.txt - serial IP load balancing fib_trie.txt @@ -94,16 +88,8 @@ generic_netlink.txt - info on Generic Netlink gianfar.txt - Gianfar Ethernet Driver. -i40e.txt - - README for the Intel Ethernet Controller XL710 Driver (i40e). -iavf.txt - - README for the Intel Ethernet Adaptive Virtual Function Driver (iavf). ieee802154.txt - Linux IEEE 802.15.4 implementation, API and drivers -igb.txt - - README for the Intel Gigabit Ethernet Driver (igb). -igbvf.txt - - README for the Intel Gigabit Ethernet Driver (igbvf). ip-sysctl.txt - /proc/sys/net/ipv4/* variables ip_dynaddr.txt @@ -120,12 +106,6 @@ ipvs-sysctl.txt - Per-inode explanation of the /proc/sys/net/ipv4/vs interface. irda.txt - where to get IrDA (infrared) utilities and info for Linux. -ixgb.txt - - README for the Intel 10 Gigabit Ethernet Driver (ixgb). -ixgbe.txt - - README for the Intel 10 Gigabit Ethernet Driver (ixgbe). -ixgbevf.txt - - README for the Intel Virtual Function (VF) Driver (ixgbevf). l2tp.txt - User guide to the L2TP tunnel protocol. lapb-module.txt diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst index f81111eba9c5..5e2839b4ec92 100644 --- a/Documentation/networking/e100.rst +++ b/Documentation/networking/e100.rst @@ -1,4 +1,5 @@ -============================================================== +.. SPDX-License-Identifier: GPL-2.0+ + Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters ============================================================== diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst index f10dd4086921..6379d4d20771 100644 --- a/Documentation/networking/e1000.rst +++ b/Documentation/networking/e1000.rst @@ -1,4 +1,5 @@ -=========================================================== +.. SPDX-License-Identifier: GPL-2.0+ + Linux* Base Driver for Intel(R) Ethernet Network Connection =========================================================== diff --git a/Documentation/networking/e1000e.rst b/Documentation/networking/e1000e.rst new file mode 100644 index 000000000000..33554e5416c5 --- /dev/null +++ b/Documentation/networking/e1000e.rst @@ -0,0 +1,382 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Driver for Intel(R) Ethernet Network Connection +====================================================== + +Intel Gigabit Linux driver. +Copyright(c) 2008-2018 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Command Line Parameters +- Additional Configurations +- Support + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +https://www.intel.com/support + + +Command Line Parameters +======================= +If the driver is built as a module, the following optional parameters are used +by entering them on the command line with the modprobe command using this +syntax:: + + modprobe e1000e [<option>=<VAL1>,<VAL2>,...] + +There needs to be a <VAL#> for each network port in the system supported by +this driver. The values will be applied to each instance, in function order. +For example:: + + modprobe e1000e InterruptThrottleRate=16000,16000 + +In this case, there are two network ports supported by e1000e in the system. +The default value for each parameter is generally the recommended setting, +unless otherwise noted. + +NOTE: A descriptor describes a data buffer and attributes related to the data +buffer. This information is accessed by the hardware. + +InterruptThrottleRate +--------------------- +:Valid Range: 0,1,3,4,100-100000 +:Default Value: 3 + +Interrupt Throttle Rate controls the number of interrupts each interrupt +vector can generate per second. Increasing ITR lowers latency at the cost of +increased CPU utilization, though it may help throughput in some circumstances. + +Setting InterruptThrottleRate to a value greater or equal to 100 +will program the adapter to send out a maximum of that many interrupts +per second, even if more packets have come in. This reduces interrupt +load on the system and can lower CPU utilization under heavy load, +but will increase latency as packets are not processed as quickly. + +The default behaviour of the driver previously assumed a static +InterruptThrottleRate value of 8000, providing a good fallback value for +all traffic types, but lacking in small packet performance and latency. +The hardware can handle many more small packets per second however, and +for this reason an adaptive interrupt moderation algorithm was implemented. + +The driver has two adaptive modes (setting 1 or 3) in which +it dynamically adjusts the InterruptThrottleRate value based on the traffic +that it receives. After determining the type of incoming traffic in the last +timeframe, it will adjust the InterruptThrottleRate to an appropriate value +for that traffic. + +The algorithm classifies the incoming traffic every interval into +classes. Once the class is determined, the InterruptThrottleRate value is +adjusted to suit that traffic type the best. There are three classes defined: +"Bulk traffic", for large amounts of packets of normal size; "Low latency", +for small amounts of traffic and/or a significant percentage of small +packets; and "Lowest latency", for almost completely small packets or +minimal traffic. + + - 0: Off + Turns off any interrupt moderation and may improve small packet latency. + However, this is generally not suitable for bulk throughput traffic due + to the increased CPU utilization of the higher interrupt rate. + - 1: Dynamic mode + This mode attempts to moderate interrupts per vector while maintaining + very low latency. This can sometimes cause extra CPU utilization. If + planning on deploying e1000e in a latency sensitive environment, this + parameter should be considered. + - 3: Dynamic Conservative mode (default) + In dynamic conservative mode, the InterruptThrottleRate value is set to + 4000 for traffic that falls in class "Bulk traffic". If traffic falls in + the "Low latency" or "Lowest latency" class, the InterruptThrottleRate is + increased stepwise to 20000. This default mode is suitable for most + applications. + - 4: Simplified Balancing mode + In simplified mode the interrupt rate is based on the ratio of TX and + RX traffic. If the bytes per second rate is approximately equal, the + interrupt rate will drop as low as 2000 interrupts per second. If the + traffic is mostly transmit or mostly receive, the interrupt rate could + be as high as 8000. + - 100-100000: + Setting InterruptThrottleRate to a value greater or equal to 100 + will program the adapter to send at most that many interrupts per second, + even if more packets have come in. This reduces interrupt load on the + system and can lower CPU utilization under heavy load, but will increase + latency as packets are not processed as quickly. + +NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and +RxAbsIntDelay parameters. In other words, minimizing the receive and/or +transmit absolute delays does not force the controller to generate more +interrupts than what the Interrupt Throttle Rate allows. + +RxIntDelay +---------- +:Valid Range: 0-65535 (0=off) +:Default Value: 0 + +This value delays the generation of receive interrupts in units of 1.024 +microseconds. Receive interrupt reduction can improve CPU efficiency if +properly tuned for specific network traffic. Increasing this value adds extra +latency to frame reception and can end up decreasing the throughput of TCP +traffic. If the system is reporting dropped receives, this value may be set +too high, causing the driver to run out of available receive descriptors. + +CAUTION: When setting RxIntDelay to a value other than 0, adapters may hang +(stop transmitting) under certain network conditions. If this occurs a NETDEV +WATCHDOG message is logged in the system event log. In addition, the +controller is automatically reset, restoring the network connection. To +eliminate the potential for the hang ensure that RxIntDelay is set to 0. + +RxAbsIntDelay +------------- +:Valid Range: 0-65535 (0=off) +:Default Value: 8 + +This value, in units of 1.024 microseconds, limits the delay in which a +receive interrupt is generated. This value ensures that an interrupt is +generated after the initial packet is received within the set amount of time, +which is useful only if RxIntDelay is non-zero. Proper tuning, along with +RxIntDelay, may improve traffic throughput in specific network conditions. + +TxIntDelay +---------- +:Valid Range: 0-65535 (0=off) +:Default Value: 8 + +This value delays the generation of transmit interrupts in units of 1.024 +microseconds. Transmit interrupt reduction can improve CPU efficiency if +properly tuned for specific network traffic. If the system is reporting +dropped transmits, this value may be set too high causing the driver to run +out of available transmit descriptors. + +TxAbsIntDelay +------------- +:Valid Range: 0-65535 (0=off) +:Default Value: 32 + +This value, in units of 1.024 microseconds, limits the delay in which a +transmit interrupt is generated. It is useful only if TxIntDelay is non-zero. +It ensures that an interrupt is generated after the initial Packet is sent on +the wire within the set amount of time. Proper tuning, along with TxIntDelay, +may improve traffic throughput in specific network conditions. + +copybreak +--------- +:Valid Range: 0-xxxxxxx (0=off) +:Default Value: 256 + +The driver copies all packets below or equaling this size to a fresh receive +buffer before handing it up the stack. +This parameter differs from other parameters because it is a single (not 1,1,1 +etc.) parameter applied to all driver instances and it is also available +during runtime at /sys/module/e1000e/parameters/copybreak. + +To use copybreak, type:: + + modprobe e1000e.ko copybreak=128 + +SmartPowerDownEnable +-------------------- +:Valid Range: 0,1 +:Default Value: 0 (disabled) + +Allows the PHY to turn off in lower power states. The user can turn off this +parameter in supported chipsets. + +KumeranLockLoss +--------------- +:Valid Range: 0,1 +:Default Value: 1 (enabled) + +This workaround skips resetting the PHY at shutdown for the initial silicon +releases of ICH8 systems. + +IntMode +------- +:Valid Range: 0-2 +:Default Value: 0 + + +-------+----------------+ + | Value | Interrupt Mode | + +=======+================+ + | 0 | Legacy | + +-------+----------------+ + | 1 | MSI | + +-------+----------------+ + | 2 | MSI-X | + +-------+----------------+ + +IntMode allows load time control over the type of interrupt registered for by +the driver. MSI-X is required for multiple queue support, and some kernels and +combinations of kernel .config options will force a lower level of interrupt +support. + +This command will show different values for each type of interrupt:: + + cat /proc/interrupts + +CrcStripping +------------ +:Valid Range: 0,1 +:Default Value: 1 (enabled) + +Strip the CRC from received packets before sending up the network stack. If +you have a machine with a BMC enabled but cannot receive IPMI traffic after +loading or enabling the driver, try disabling this feature. + +WriteProtectNVM +--------------- +:Valid Range: 0,1 +:Default Value: 1 (enabled) + +If set to 1, configure the hardware to ignore all write/erase cycles to the +GbE region in the ICHx NVM (in order to prevent accidental corruption of the +NVM). This feature can be disabled by setting the parameter to 0 during initial +driver load. + +NOTE: The machine must be power cycled (full off/on) when enabling NVM writes +via setting the parameter to zero. Once the NVM has been locked (via the +parameter at 1 when the driver loads) it cannot be unlocked except via power +cycle. + +Debug +----- +:Valid Range: 0-16 (0=none,...,16=all) +:Default Value: 0 + +This parameter adjusts the level of debug messages displayed in the system logs. + + +Additional Features and Configurations +====================================== + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ifconfig command to increase the MTU size. For example, enter the +following where <x> is the interface number:: + + ifconfig eth<x> mtu 9000 up + +Alternatively, you can use the ip command as follows:: + + ip link set mtu 9000 dev eth<x> + ip link set up dev eth<x> + +This setting is not saved across reboots. The setting change can be made +permanent by adding 'MTU=9000' to the file: + +- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x> +- For SLES: /etc/sysconfig/network/<config_file> + +NOTE: The maximum MTU setting for Jumbo Frames is 8996. This value coincides +with the maximum Jumbo Frames size of 9018 bytes. + +NOTE: Using Jumbo frames at 10 or 100 Mbps is not supported and may result in +poor performance or loss of link. + +NOTE: The following adapters limit Jumbo Frames sized packets to a maximum of +4088 bytes: + + - Intel(R) 82578DM Gigabit Network Connection + - Intel(R) 82577LM Gigabit Network Connection + +The following adapters do not support Jumbo Frames: + + - Intel(R) PRO/1000 Gigabit Server Adapter + - Intel(R) PRO/1000 PM Network Connection + - Intel(R) 82562G 10/100 Network Connection + - Intel(R) 82562G-2 10/100 Network Connection + - Intel(R) 82562GT 10/100 Network Connection + - Intel(R) 82562GT-2 10/100 Network Connection + - Intel(R) 82562V 10/100 Network Connection + - Intel(R) 82562V-2 10/100 Network Connection + - Intel(R) 82566DC Gigabit Network Connection + - Intel(R) 82566DC-2 Gigabit Network Connection + - Intel(R) 82566DM Gigabit Network Connection + - Intel(R) 82566MC Gigabit Network Connection + - Intel(R) 82566MM Gigabit Network Connection + - Intel(R) 82567V-3 Gigabit Network Connection + - Intel(R) 82577LC Gigabit Network Connection + - Intel(R) 82578DC Gigabit Network Connection + +NOTE: Jumbo Frames cannot be configured on an 82579-based Network device if +MACSec is enabled on the system. + + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: + +https://www.kernel.org/pub/software/network/ethtool/ + +NOTE: When validating enable/disable tests on some parts (for example, 82578), +it is necessary to add a few seconds between tests when working with ethtool. + + +Speed and Duplex Configuration +------------------------------ +In addressing speed and duplex configuration issues, you need to distinguish +between copper-based adapters and fiber-based adapters. + +In the default mode, an Intel(R) Ethernet Network Adapter using copper +connections will attempt to auto-negotiate with its link partner to determine +the best setting. If the adapter cannot establish link with the link partner +using auto-negotiation, you may need to manually configure the adapter and link +partner to identical settings to establish link and pass packets. This should +only be needed when attempting to link with an older switch that does not +support auto-negotiation or one that has been forced to a specific speed or +duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds +and higher cannot be forced. Use the autonegotiation advertising setting to +manually set devices for 1 Gbps and higher. + +Speed, duplex, and autonegotiation advertising are configured through the +ethtool* utility. + +Caution: Only experienced network administrators should force speed and duplex +or change autonegotiation advertising manually. The settings at the switch must +always match the adapter settings. Adapter performance may suffer or your +adapter may not operate if you configure the adapter differently from your +switch. + +An Intel(R) Ethernet Network Adapter using fiber-based connections, however, +will not attempt to auto-negotiate with its link partner since those adapters +operate only in full duplex and only at their native speed. + + +Enabling Wake on LAN* (WoL) +--------------------------- +WoL is configured through the ethtool* utility. + +WoL will be enabled on the system during the next shut down or reboot. For +this driver version, in order to enable WoL, the e1000e driver must be loaded +prior to shutting down or suspending the system. + +NOTE: Wake on LAN is only supported on port A for the following devices: +- Intel(R) PRO/1000 PT Dual Port Network Connection +- Intel(R) PRO/1000 PT Dual Port Server Connection +- Intel(R) PRO/1000 PT Dual Port Server Adapter +- Intel(R) PRO/1000 PF Dual Port Server Adapter +- Intel(R) PRO/1000 PT Quad Port Server Adapter +- Intel(R) Gigabit PT Quad Port Server ExpressModule + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/e1000e.txt b/Documentation/networking/e1000e.txt deleted file mode 100644 index 12089547baed..000000000000 --- a/Documentation/networking/e1000e.txt +++ /dev/null @@ -1,312 +0,0 @@ -Linux* Driver for Intel(R) Ethernet Network Connection -====================================================== - -Intel Gigabit Linux driver. -Copyright(c) 1999 - 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Command Line Parameters -- Additional Configurations -- Support - -Identifying Your Adapter -======================== - -The e1000e driver supports all PCI Express Intel(R) Gigabit Network -Connections, except those that are 82575, 82576 and 82580-based*. - -* NOTE: The Intel(R) PRO/1000 P Dual Port Server Adapter is supported by - the e1000 driver, not the e1000e driver due to the 82546 part being used - behind a PCI Express bridge. - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/go/network/adapter/idguide.htm - -For the latest Intel network drivers for Linux, refer to the following -website. In the search field, enter your adapter name or type, or use the -networking link on the left to search for your adapter: - - http://support.intel.com/support/go/network/adapter/home.htm - -Command Line Parameters -======================= - -The default value for each parameter is generally the recommended setting, -unless otherwise noted. - -NOTES: For more information about the InterruptThrottleRate, - RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay - parameters, see the application note at: - http://www.intel.com/design/network/applnots/ap450.htm - -InterruptThrottleRate ---------------------- -Valid Range: 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative, - 4=simplified balancing) -Default Value: 3 - -The driver can limit the amount of interrupts per second that the adapter -will generate for incoming packets. It does this by writing a value to the -adapter that is based on the maximum amount of interrupts that the adapter -will generate per second. - -Setting InterruptThrottleRate to a value greater or equal to 100 -will program the adapter to send out a maximum of that many interrupts -per second, even if more packets have come in. This reduces interrupt -load on the system and can lower CPU utilization under heavy load, -but will increase latency as packets are not processed as quickly. - -The default behaviour of the driver previously assumed a static -InterruptThrottleRate value of 8000, providing a good fallback value for -all traffic types, but lacking in small packet performance and latency. -The hardware can handle many more small packets per second however, and -for this reason an adaptive interrupt moderation algorithm was implemented. - -The driver has two adaptive modes (setting 1 or 3) in which -it dynamically adjusts the InterruptThrottleRate value based on the traffic -that it receives. After determining the type of incoming traffic in the last -timeframe, it will adjust the InterruptThrottleRate to an appropriate value -for that traffic. - -The algorithm classifies the incoming traffic every interval into -classes. Once the class is determined, the InterruptThrottleRate value is -adjusted to suit that traffic type the best. There are three classes defined: -"Bulk traffic", for large amounts of packets of normal size; "Low latency", -for small amounts of traffic and/or a significant percentage of small -packets; and "Lowest latency", for almost completely small packets or -minimal traffic. - -In dynamic conservative mode, the InterruptThrottleRate value is set to 4000 -for traffic that falls in class "Bulk traffic". If traffic falls in the "Low -latency" or "Lowest latency" class, the InterruptThrottleRate is increased -stepwise to 20000. This default mode is suitable for most applications. - -For situations where low latency is vital such as cluster or -grid computing, the algorithm can reduce latency even more when -InterruptThrottleRate is set to mode 1. In this mode, which operates -the same as mode 3, the InterruptThrottleRate will be increased stepwise to -70000 for traffic in class "Lowest latency". - -In simplified mode the interrupt rate is based on the ratio of TX and -RX traffic. If the bytes per second rate is approximately equal, the -interrupt rate will drop as low as 2000 interrupts per second. If the -traffic is mostly transmit or mostly receive, the interrupt rate could -be as high as 8000. - -Setting InterruptThrottleRate to 0 turns off any interrupt moderation -and may improve small packet latency, but is generally not suitable -for bulk throughput traffic. - -NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and - RxAbsIntDelay parameters. In other words, minimizing the receive - and/or transmit absolute delays does not force the controller to - generate more interrupts than what the Interrupt Throttle Rate - allows. - -NOTE: When e1000e is loaded with default settings and multiple adapters - are in use simultaneously, the CPU utilization may increase non- - linearly. In order to limit the CPU utilization without impacting - the overall throughput, we recommend that you load the driver as - follows: - - modprobe e1000e InterruptThrottleRate=3000,3000,3000 - - This sets the InterruptThrottleRate to 3000 interrupts/sec for - the first, second, and third instances of the driver. The range - of 2000 to 3000 interrupts per second works on a majority of - systems and is a good starting point, but the optimal value will - be platform-specific. If CPU utilization is not a concern, use - RX_POLLING (NAPI) and default driver settings. - -RxIntDelay ----------- -Valid Range: 0-65535 (0=off) -Default Value: 0 - -This value delays the generation of receive interrupts in units of 1.024 -microseconds. Receive interrupt reduction can improve CPU efficiency if -properly tuned for specific network traffic. Increasing this value adds -extra latency to frame reception and can end up decreasing the throughput -of TCP traffic. If the system is reporting dropped receives, this value -may be set too high, causing the driver to run out of available receive -descriptors. - -CAUTION: When setting RxIntDelay to a value other than 0, adapters may - hang (stop transmitting) under certain network conditions. If - this occurs a NETDEV WATCHDOG message is logged in the system - event log. In addition, the controller is automatically reset, - restoring the network connection. To eliminate the potential - for the hang ensure that RxIntDelay is set to 0. - -RxAbsIntDelay -------------- -Valid Range: 0-65535 (0=off) -Default Value: 8 - -This value, in units of 1.024 microseconds, limits the delay in which a -receive interrupt is generated. Useful only if RxIntDelay is non-zero, -this value ensures that an interrupt is generated after the initial -packet is received within the set amount of time. Proper tuning, -along with RxIntDelay, may improve traffic throughput in specific network -conditions. - -TxIntDelay ----------- -Valid Range: 0-65535 (0=off) -Default Value: 8 - -This value delays the generation of transmit interrupts in units of -1.024 microseconds. Transmit interrupt reduction can improve CPU -efficiency if properly tuned for specific network traffic. If the -system is reporting dropped transmits, this value may be set too high -causing the driver to run out of available transmit descriptors. - -TxAbsIntDelay -------------- -Valid Range: 0-65535 (0=off) -Default Value: 32 - -This value, in units of 1.024 microseconds, limits the delay in which a -transmit interrupt is generated. Useful only if TxIntDelay is non-zero, -this value ensures that an interrupt is generated after the initial -packet is sent on the wire within the set amount of time. Proper tuning, -along with TxIntDelay, may improve traffic throughput in specific -network conditions. - -Copybreak ---------- -Valid Range: 0-xxxxxxx (0=off) -Default Value: 256 - -Driver copies all packets below or equaling this size to a fresh RX -buffer before handing it up the stack. - -This parameter is different than other parameters, in that it is a -single (not 1,1,1 etc.) parameter applied to all driver instances and -it is also available during runtime at -/sys/module/e1000e/parameters/copybreak - -SmartPowerDownEnable --------------------- -Valid Range: 0-1 -Default Value: 0 (disabled) - -Allows PHY to turn off in lower power states. The user can set this parameter -in supported chipsets. - -KumeranLockLoss ---------------- -Valid Range: 0-1 -Default Value: 1 (enabled) - -This workaround skips resetting the PHY at shutdown for the initial -silicon releases of ICH8 systems. - -IntMode -------- -Valid Range: 0-2 (0=legacy, 1=MSI, 2=MSI-X) -Default Value: 2 - -Allows changing the interrupt mode at module load time, without requiring a -recompile. If the driver load fails to enable a specific interrupt mode, the -driver will try other interrupt modes, from least to most compatible. The -interrupt order is MSI-X, MSI, Legacy. If specifying MSI (IntMode=1) -interrupts, only MSI and Legacy will be attempted. - -CrcStripping ------------- -Valid Range: 0-1 -Default Value: 1 (enabled) - -Strip the CRC from received packets before sending up the network stack. If -you have a machine with a BMC enabled but cannot receive IPMI traffic after -loading or enabling the driver, try disabling this feature. - -WriteProtectNVM ---------------- -Valid Range: 0,1 -Default Value: 1 - -If set to 1, configure the hardware to ignore all write/erase cycles to the -GbE region in the ICHx NVM (in order to prevent accidental corruption of the -NVM). This feature can be disabled by setting the parameter to 0 during initial -driver load. -NOTE: The machine must be power cycled (full off/on) when enabling NVM writes -via setting the parameter to zero. Once the NVM has been locked (via the -parameter at 1 when the driver loads) it cannot be unlocked except via power -cycle. - -Additional Configurations -========================= - - Jumbo Frames - ------------ - Jumbo Frames support is enabled by changing the MTU to a value larger than - the default of 1500. Use the ifconfig command to increase the MTU size. - For example: - - ifconfig eth<x> mtu 9000 up - - This setting is not saved across reboots. - - Notes: - - - The maximum MTU setting for Jumbo Frames is 9216. This value coincides - with the maximum Jumbo Frames size of 9234 bytes. - - - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in - poor performance or loss of link. - - - Some adapters limit Jumbo Frames sized packets to a maximum of - 4096 bytes and some adapters do not support Jumbo Frames. - - - Jumbo Frames cannot be configured on an 82579-based Network device, if - MACSec is enabled on the system. - - ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. We - strongly recommend downloading the latest version of ethtool at: - - https://kernel.org/pub/software/network/ethtool/ - - NOTE: When validating enable/disable tests on some parts (82578, for example) - you need to add a few seconds between tests when working with ethtool. - - Speed and Duplex - ---------------- - Speed and Duplex are configured through the ethtool* utility. For - instructions, refer to the ethtool man page. - - Enabling Wake on LAN* (WoL) - --------------------------- - WoL is configured through the ethtool* utility. For instructions on - enabling WoL with ethtool, refer to the ethtool man page. - - WoL will be enabled on the system during the next shut down or reboot. - For this driver version, in order to enable WoL, the e1000e driver must be - loaded when shutting down or rebooting the system. - - In most cases Wake On LAN is only supported on port A for multiple port - adapters. To verify if a port supports Wake on Lan run ethtool eth<X>. - -Support -======= - -For general information, go to the Intel support website at: - - www.intel.com/support/ - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/fm10k.rst b/Documentation/networking/fm10k.rst new file mode 100644 index 000000000000..bf5e5942f28d --- /dev/null +++ b/Documentation/networking/fm10k.rst @@ -0,0 +1,141 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Driver for Intel(R) Ethernet Multi-host Controller +============================================================== + +August 20, 2018 +Copyright(c) 2015-2018 Intel Corporation. + +Contents +======== +- Identifying Your Adapter +- Additional Configurations +- Performance Tuning +- Known Issues +- Support + +Identifying Your Adapter +======================== +The driver in this release is compatible with devices based on the Intel(R) +Ethernet Multi-host Controller. + +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Flow Control +------------ +The Intel(R) Ethernet Switch Host Interface Driver does not support Flow +Control. It will not send pause frames. This may result in dropped frames. + + +Virtual Functions (VFs) +----------------------- +Use sysfs to enable VFs. +Valid Range: 0-64 + +For example:: + + echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs //enable VFs + echo 0 > /sys/class/net/$dev/device/sriov_numvfs //disable VFs + +NOTE: Neither the device nor the driver control how VFs are mapped into config +space. Bus layout will vary by operating system. On operating systems that +support it, you can check sysfs to find the mapping. + +NOTE: When SR-IOV mode is enabled, hardware VLAN filtering and VLAN tag +stripping/insertion will remain enabled. Please remove the old VLAN filter +before the new VLAN filter is added. For example:: + + ip link set eth0 vf 0 vlan 100 // set vlan 100 for VF 0 + ip link set eth0 vf 0 vlan 0 // Delete vlan 100 + ip link set eth0 vf 0 vlan 200 // set a new vlan 200 for VF 0 + + +Additional Features and Configurations +====================================== + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ifconfig command to increase the MTU size. For example, enter the +following where <x> is the interface number:: + + ifconfig eth<x> mtu 9000 up + +Alternatively, you can use the ip command as follows:: + + ip link set mtu 9000 dev eth<x> + ip link set up dev eth<x> + +This setting is not saved across reboots. The setting change can be made +permanent by adding 'MTU=9000' to the file: + +- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x> +- For SLES: /etc/sysconfig/network/<config_file> + +NOTE: The maximum MTU setting for Jumbo Frames is 15342. This value coincides +with the maximum Jumbo Frames size of 15364 bytes. + +NOTE: This driver will attempt to use multiple page sized buffers to receive +each jumbo packet. This should help to avoid buffer starvation issues when +allocating receive packets. + + +Generic Receive Offload, aka GRO +-------------------------------- +The driver supports the in-kernel software implementation of GRO. GRO has +shown that by coalescing Rx traffic into larger chunks of data, CPU +utilization can be significantly reduced when under large Rx load. GRO is an +evolution of the previously-used LRO interface. GRO is able to coalesce +other protocols besides TCP. It's also safe to use with configurations that +are problematic for LRO, namely bridging and iSCSI. + + + +Supported ethtool Commands and Options for Filtering +---------------------------------------------------- +-n --show-nfc + Retrieves the receive network flow classification configurations. + +rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 + Retrieves the hash options for the specified network traffic type. + +-N --config-nfc + Configures the receive network flow classification. + +rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r + Configures the hash options for the specified network traffic type. + +- udp4: UDP over IPv4 +- udp6: UDP over IPv6 +- f Hash on bytes 0 and 1 of the Layer 4 header of the rx packet. +- n Hash on bytes 2 and 3 of the Layer 4 header of the rx packet. + + +Known Issues/Troubleshooting +============================ + +Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS under Linux KVM +--------------------------------------------------------------------------------------- +KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This +includes traditional PCIe devices, as well as SR-IOV-capable devices based on +the Intel Ethernet Controller XL710. + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/i40e.rst b/Documentation/networking/i40e.rst new file mode 100644 index 000000000000..0cc16c525d10 --- /dev/null +++ b/Documentation/networking/i40e.rst @@ -0,0 +1,770 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series +================================================================== + +Intel 40 Gigabit Linux driver. +Copyright(c) 1999-2018 Intel Corporation. + +Contents +======== + +- Overview +- Identifying Your Adapter +- Intel(R) Ethernet Flow Director +- Additional Configurations +- Known Issues +- Support + + +Driver information can be obtained using ethtool, lspci, and ifconfig. +Instructions on updating ethtool can be found in the section Additional +Configurations later in this document. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + + +Identifying Your Adapter +======================== +The driver is compatible with devices based on the following: + + * Intel(R) Ethernet Controller X710 + * Intel(R) Ethernet Controller XL710 + * Intel(R) Ethernet Network Connection X722 + * Intel(R) Ethernet Controller XXV710 + +For the best performance, make sure the latest NVM/FW is installed on your +device. + +For information on how to identify your adapter, and for the latest NVM/FW +images and Intel network drivers, refer to the Intel Support website: +https://www.intel.com/support + +SFP+ and QSFP+ Devices +---------------------- +For information about supported media, refer to this document: +https://www.intel.com/content/dam/www/public/us/en/documents/release-notes/xl710-ethernet-controller-feature-matrix.pdf + +NOTE: Some adapters based on the Intel(R) Ethernet Controller 700 Series only +support Intel Ethernet Optics modules. On these adapters, other modules are not +supported and will not function. In all cases Intel recommends using Intel +Ethernet Optics; other modules may function but are not validated by Intel. +Contact Intel for supported media types. + +NOTE: For connections based on Intel(R) Ethernet Controller 700 Series, support +is dependent on your system board. Please see your vendor for details. + +NOTE: In systems that do not have adequate airflow to cool the adapter and +optical modules, you must use high temperature optical modules. + +Virtual Functions (VFs) +----------------------- +Use sysfs to enable VFs. For example:: + + #echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs #enable VFs + #echo 0 > /sys/class/net/$dev/device/sriov_numvfs #disable VFs + +For example, the following instructions will configure PF eth0 and the first VF +on VLAN 10:: + + $ ip link set dev eth0 vf 0 vlan 10 + +VLAN Tag Packet Steering +------------------------ +Allows you to send all packets with a specific VLAN tag to a particular SR-IOV +virtual function (VF). Further, this feature allows you to designate a +particular VF as trusted, and allows that trusted VF to request selective +promiscuous mode on the Physical Function (PF). + +To set a VF as trusted or untrusted, enter the following command in the +Hypervisor:: + + # ip link set dev eth0 vf 1 trust [on|off] + +Once the VF is designated as trusted, use the following commands in the VM to +set the VF to promiscuous mode. + +:: + + For promiscuous all: + #ip link set eth2 promisc on + Where eth2 is a VF interface in the VM + + For promiscuous Multicast: + #ip link set eth2 allmulticast on + Where eth2 is a VF interface in the VM + +NOTE: By default, the ethtool priv-flag vf-true-promisc-support is set to +"off",meaning that promiscuous mode for the VF will be limited. To set the +promiscuous mode for the VF to true promiscuous and allow the VF to see all +ingress traffic, use the following command:: + + #ethtool -set-priv-flags p261p1 vf-true-promisc-support on + +The vf-true-promisc-support priv-flag does not enable promiscuous mode; rather, +it designates which type of promiscuous mode (limited or true) you will get +when you enable promiscuous mode using the ip link commands above. Note that +this is a global setting that affects the entire device. However,the +vf-true-promisc-support priv-flag is only exposed to the first PF of the +device. The PF remains in limited promiscuous mode (unless it is in MFP mode) +regardless of the vf-true-promisc-support setting. + +Now add a VLAN interface on the VF interface:: + + #ip link add link eth2 name eth2.100 type vlan id 100 + +Note that the order in which you set the VF to promiscuous mode and add the +VLAN interface does not matter (you can do either first). The end result in +this example is that the VF will get all traffic that is tagged with VLAN 100. + +Intel(R) Ethernet Flow Director +------------------------------- +The Intel Ethernet Flow Director performs the following tasks: + +- Directs receive packets according to their flows to different queues. +- Enables tight control on routing a flow in the platform. +- Matches flows and CPU cores for flow affinity. +- Supports multiple parameters for flexible flow classification and load + balancing (in SFP mode only). + +NOTE: The Linux i40e driver supports the following flow types: IPv4, TCPv4, and +UDPv4. For a given flow type, it supports valid combinations of IP addresses +(source or destination) and UDP/TCP ports (source and destination). For +example, you can supply only a source IP address, a source IP address and a +destination port, or any combination of one or more of these four parameters. + +NOTE: The Linux i40e driver allows you to filter traffic based on a +user-defined flexible two-byte pattern and offset by using the ethtool user-def +and mask fields. Only L3 and L4 flow types are supported for user-defined +flexible filters. For a given flow type, you must clear all Intel Ethernet Flow +Director filters before changing the input set (for that flow type). + +To enable or disable the Intel Ethernet Flow Director:: + + # ethtool -K ethX ntuple <on|off> + +When disabling ntuple filters, all the user programmed filters are flushed from +the driver cache and hardware. All needed filters must be re-added when ntuple +is re-enabled. + +To add a filter that directs packet to queue 2, use -U or -N switch:: + + # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \ + 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1] + +To set a filter using only the source and destination IP address:: + + # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \ + 192.168.10.2 action 2 [loc 1] + +To see the list of filters currently present:: + + # ethtool <-u|-n> ethX + +Application Targeted Routing (ATR) Perfect Filters +-------------------------------------------------- +ATR is enabled by default when the kernel is in multiple transmit queue mode. +An ATR Intel Ethernet Flow Director filter rule is added when a TCP-IP flow +starts and is deleted when the flow ends. When a TCP-IP Intel Ethernet Flow +Director rule is added from ethtool (Sideband filter), ATR is turned off by the +driver. To re-enable ATR, the sideband can be disabled with the ethtool -K +option. For example:: + + ethtool –K [adapter] ntuple [off|on] + +If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a +TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is +automatically re-enabled. + +Packets that match the ATR rules are counted in fdir_atr_match stats in +ethtool, which also can be used to verify whether ATR rules still exist. + +Sideband Perfect Filters +------------------------ +Sideband Perfect Filters are used to direct traffic that matches specified +characteristics. They are enabled through ethtool's ntuple interface. To add a +new filter use the following command:: + + ethtool -U <device> flow-type <type> src-ip <ip> dst-ip <ip> src-port <port> \ + dst-port <port> action <queue> + +Where: + <device> - the ethernet device to program + <type> - can be ip4, tcp4, udp4, or sctp4 + <ip> - the ip address to match on + <port> - the port number to match on + <queue> - the queue to direct traffic towards (-1 discards matching traffic) + +Use the following command to display all of the active filters:: + + ethtool -u <device> + +Use the following command to delete a filter:: + + ethtool -U <device> delete <N> + +Where <N> is the filter id displayed when printing all the active filters, and +may also have been specified using "loc <N>" when adding the filter. + +The following example matches TCP traffic sent from 192.168.0.1, port 5300, +directed to 192.168.0.5, port 80, and sends it to queue 7:: + + ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 \ + src-port 5300 dst-port 80 action 7 + +For each flow-type, the programmed filters must all have the same matching +input set. For example, issuing the following two commands is acceptable:: + + ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7 + ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10 + +Issuing the next two commands, however, is not acceptable, since the first +specifies src-ip and the second specifies dst-ip:: + + ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7 + ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10 + +The second command will fail with an error. You may program multiple filters +with the same fields, using different values, but, on one device, you may not +program two tcp4 filters with different matching fields. + +Matching on a sub-portion of a field is not supported by the i40e driver, thus +partial mask fields are not supported. + +The driver also supports matching user-defined data within the packet payload. +This flexible data is specified using the "user-def" field of the ethtool +command in the following way: + ++----------------------------+--------------------------+ +| 31 28 24 20 16 | 15 12 8 4 0 | ++----------------------------+--------------------------+ +| offset into packet payload | 2 bytes of flexible data | ++----------------------------+--------------------------+ + +For example, + +:: + + ... user-def 0x4FFFF ... + +tells the filter to look 4 bytes into the payload and match that value against +0xFFFF. The offset is based on the beginning of the payload, and not the +beginning of the packet. Thus + +:: + + flow-type tcp4 ... user-def 0x8BEAF ... + +would match TCP/IPv4 packets which have the value 0xBEAF 8 bytes into the +TCP/IPv4 payload. + +Note that ICMP headers are parsed as 4 bytes of header and 4 bytes of payload. +Thus to match the first byte of the payload, you must actually add 4 bytes to +the offset. Also note that ip4 filters match both ICMP frames as well as raw +(unknown) ip4 frames, where the payload will be the L3 payload of the IP4 frame. + +The maximum offset is 64. The hardware will only read up to 64 bytes of data +from the payload. The offset must be even because the flexible data is 2 bytes +long and must be aligned to byte 0 of the packet payload. + +The user-defined flexible offset is also considered part of the input set and +cannot be programmed separately for multiple filters of the same type. However, +the flexible data is not part of the input set and multiple filters may use the +same offset but match against different data. + +To create filters that direct traffic to a specific Virtual Function, use the +"action" parameter. Specify the action as a 64 bit value, where the lower 32 +bits represents the queue number, while the next 8 bits represent which VF. +Note that 0 is the PF, so the VF identifier is offset by 1. For example:: + + ... action 0x800000002 ... + +specifies to direct traffic to Virtual Function 7 (8 minus 1) into queue 2 of +that VF. + +Note that these filters will not break internal routing rules, and will not +route traffic that otherwise would not have been sent to the specified Virtual +Function. + +Setting the link-down-on-close Private Flag +------------------------------------------- +When the link-down-on-close private flag is set to "on", the port's link will +go down when the interface is brought down using the ifconfig ethX down command. + +Use ethtool to view and set link-down-on-close, as follows:: + + ethtool --show-priv-flags ethX + ethtool --set-priv-flags ethX link-down-on-close [on|off] + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + dmesg -n 8 + +NOTE: This setting is not saved across reboots. + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ifconfig command to increase the MTU size. For example, enter the +following where <x> is the interface number:: + + ifconfig eth<x> mtu 9000 up + +Alternatively, you can use the ip command as follows:: + + ip link set mtu 9000 dev eth<x> + ip link set up dev eth<x> + +This setting is not saved across reboots. The setting change can be made +permanent by adding 'MTU=9000' to the file:: + + /etc/sysconfig/network-scripts/ifcfg-eth<x> // for RHEL + /etc/sysconfig/network/<config_file> // for SLES + +NOTE: The maximum MTU setting for Jumbo Frames is 9702. This value coincides +with the maximum Jumbo Frames size of 9728 bytes. + +NOTE: This driver will attempt to use multiple page sized buffers to receive +each jumbo packet. This should help to avoid buffer starvation issues when +allocating receive packets. + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: +https://www.kernel.org/pub/software/network/ethtool/ + +Supported ethtool Commands and Options for Filtering +---------------------------------------------------- +-n --show-nfc + Retrieves the receive network flow classification configurations. + +rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 + Retrieves the hash options for the specified network traffic type. + +-N --config-nfc + Configures the receive network flow classification. + +rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r... + Configures the hash options for the specified network traffic type. + +udp4 UDP over IPv4 +udp6 UDP over IPv6 + +f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet. +n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet. + +Speed and Duplex Configuration +------------------------------ +In addressing speed and duplex configuration issues, you need to distinguish +between copper-based adapters and fiber-based adapters. + +In the default mode, an Intel(R) Ethernet Network Adapter using copper +connections will attempt to auto-negotiate with its link partner to determine +the best setting. If the adapter cannot establish link with the link partner +using auto-negotiation, you may need to manually configure the adapter and link +partner to identical settings to establish link and pass packets. This should +only be needed when attempting to link with an older switch that does not +support auto-negotiation or one that has been forced to a specific speed or +duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds +and higher cannot be forced. Use the autonegotiation advertising setting to +manually set devices for 1 Gbps and higher. + +NOTE: You cannot set the speed for devices based on the Intel(R) Ethernet +Network Adapter XXV710 based devices. + +Speed, duplex, and autonegotiation advertising are configured through the +ethtool* utility. + +Caution: Only experienced network administrators should force speed and duplex +or change autonegotiation advertising manually. The settings at the switch must +always match the adapter settings. Adapter performance may suffer or your +adapter may not operate if you configure the adapter differently from your +switch. + +An Intel(R) Ethernet Network Adapter using fiber-based connections, however, +will not attempt to auto-negotiate with its link partner since those adapters +operate only in full duplex and only at their native speed. + +NAPI +---- +NAPI (Rx polling mode) is supported in the i40e driver. +For more information on NAPI, see +https://wiki.linuxfoundation.org/networking/napi + +Flow Control +------------ +Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable +receiving and transmitting pause frames for i40e. When transmit is enabled, +pause frames are generated when the receive packet buffer crosses a predefined +threshold. When receive is enabled, the transmit unit will halt for the time +delay specified when a pause frame is received. + +NOTE: You must have a flow control capable link partner. + +Flow Control is on by default. + +Use ethtool to change the flow control settings. + +To enable or disable Rx or Tx Flow Control:: + + ethtool -A eth? rx <on|off> tx <on|off> + +Note: This command only enables or disables Flow Control if auto-negotiation is +disabled. If auto-negotiation is enabled, this command changes the parameters +used for auto-negotiation with the link partner. + +To enable or disable auto-negotiation:: + + ethtool -s eth? autoneg <on|off> + +Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending +on your device, you may not be able to change the auto-negotiation setting. + +RSS Hash Flow +------------- +Allows you to set the hash bytes per flow type and any combination of one or +more options for Receive Side Scaling (RSS) hash byte configuration. + +:: + + # ethtool -N <dev> rx-flow-hash <type> <option> + +Where <type> is: + tcp4 signifying TCP over IPv4 + udp4 signifying UDP over IPv4 + tcp6 signifying TCP over IPv6 + udp6 signifying UDP over IPv6 +And <option> is one or more of: + s Hash on the IP source address of the Rx packet. + d Hash on the IP destination address of the Rx packet. + f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet. + n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet. + +MAC and VLAN anti-spoofing feature +---------------------------------- +When a malicious driver attempts to send a spoofed packet, it is dropped by the +hardware and not transmitted. +NOTE: This feature can be disabled for a specific Virtual Function (VF):: + + ip link set <pf dev> vf <vf id> spoofchk {off|on} + +IEEE 1588 Precision Time Protocol (PTP) Hardware Clock (PHC) +------------------------------------------------------------ +Precision Time Protocol (PTP) is used to synchronize clocks in a computer +network. PTP support varies among Intel devices that support this driver. Use +"ethtool -T <netdev name>" to get a definitive list of PTP capabilities +supported by the device. + +IEEE 802.1ad (QinQ) Support +--------------------------- +The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN +IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as +"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks +allow L2 tunneling and the ability to segregate traffic within a particular +VLAN ID, among other uses. + +The following are examples of how to configure 802.1ad (QinQ):: + + ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24 + ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371 + +Where "24" and "371" are example VLAN IDs. + +NOTES: + Receive checksum offloads, cloud filters, and VLAN acceleration are not + supported for 802.1ad (QinQ) packets. + +VXLAN and GENEVE Overlay HW Offloading +-------------------------------------- +Virtual Extensible LAN (VXLAN) allows you to extend an L2 network over an L3 +network, which may be useful in a virtualized or cloud environment. Some +Intel(R) Ethernet Network devices perform VXLAN processing, offloading it from +the operating system. This reduces CPU utilization. + +VXLAN offloading is controlled by the Tx and Rx checksum offload options +provided by ethtool. That is, if Tx checksum offload is enabled, and the +adapter has the capability, VXLAN offloading is also enabled. + +Support for VXLAN and GENEVE HW offloading is dependent on kernel support of +the HW offloading features. + +Multiple Functions per Port +--------------------------- +Some adapters based on the Intel Ethernet Controller X710/XL710 support +multiple functions on a single physical port. Configure these functions through +the System Setup/BIOS. + +Minimum TX Bandwidth is the guaranteed minimum data transmission bandwidth, as +a percentage of the full physical port link speed, that the partition will +receive. The bandwidth the partition is awarded will never fall below the level +you specify. + +The range for the minimum bandwidth values is: +1 to ((100 minus # of partitions on the physical port) plus 1) +For example, if a physical port has 4 partitions, the range would be: +1 to ((100 - 4) + 1 = 97) + +The Maximum Bandwidth percentage represents the maximum transmit bandwidth +allocated to the partition as a percentage of the full physical port link +speed. The accepted range of values is 1-100. The value is used as a limiter, +should you chose that any one particular function not be able to consume 100% +of a port's bandwidth (should it be available). The sum of all the values for +Maximum Bandwidth is not restricted, because no more than 100% of a port's +bandwidth can ever be used. + +NOTE: X710/XXV710 devices fail to enable Max VFs (64) when Multiple Functions +per Port (MFP) and SR-IOV are enabled. An error from i40e is logged that says +"add vsi failed for VF N, aq_err 16". To workaround the issue, enable less than +64 virtual functions (VFs). + +Data Center Bridging (DCB) +-------------------------- +DCB is a configuration Quality of Service implementation in hardware. It uses +the VLAN priority tag (802.1p) to filter traffic. That means that there are 8 +different priorities that traffic can be filtered into. It also enables +priority flow control (802.1Qbb) which can limit or eliminate the number of +dropped packets during network stress. Bandwidth can be allocated to each of +these priorities, which is enforced at the hardware level (802.1Qaz). + +Adapter firmware implements LLDP and DCBX protocol agents as per 802.1AB and +802.1Qaz respectively. The firmware based DCBX agent runs in willing mode only +and can accept settings from a DCBX capable peer. Software configuration of +DCBX parameters via dcbtool/lldptool are not supported. + +NOTE: Firmware LLDP can be disabled by setting the private flag disable-fw-lldp. + +The i40e driver implements the DCB netlink interface layer to allow user-space +to communicate with the driver and query DCB configuration for the port. + +NOTE: +The kernel assumes that TC0 is available, and will disable Priority Flow +Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is +enabled when setting up DCB on your switch. + +Interrupt Rate Limiting +----------------------- +:Valid Range: 0-235 (0=no limit) + +The Intel(R) Ethernet Controller XL710 family supports an interrupt rate +limiting mechanism. The user can control, via ethtool, the number of +microseconds between interrupts. + +Syntax:: + + # ethtool -C ethX rx-usecs-high N + +The range of 0-235 microseconds provides an effective range of 4,310 to 250,000 +interrupts per second. The value of rx-usecs-high can be set independently of +rx-usecs and tx-usecs in the same ethtool command, and is also independent of +the adaptive interrupt moderation algorithm. The underlying hardware supports +granularity in 4-microsecond intervals, so adjacent values may result in the +same interrupt rate. + +One possible use case is the following:: + + # ethtool -C ethX adaptive-rx off adaptive-tx off rx-usecs-high 20 rx-usecs \ + 5 tx-usecs 5 + +The above command would disable adaptive interrupt moderation, and allow a +maximum of 5 microseconds before indicating a receive or transmit was complete. +However, instead of resulting in as many as 200,000 interrupts per second, it +limits total interrupts per second to 50,000 via the rx-usecs-high parameter. + +Performance Optimization +======================== +Driver defaults are meant to fit a wide variety of workloads, but if further +optimization is required we recommend experimenting with the following settings. + +NOTE: For better performance when processing small (64B) frame sizes, try +enabling Hyper threading in the BIOS in order to increase the number of logical +cores in the system and subsequently increase the number of queues available to +the adapter. + +Virtualized Environments +------------------------ +1. Disable XPS on both ends by using the included virt_perf_default script +or by running the following command as root:: + + for file in `ls /sys/class/net/<ethX>/queues/tx-*/xps_cpus`; + do echo 0 > $file; done + +2. Using the appropriate mechanism (vcpupin) in the vm, pin the cpu's to +individual lcpu's, making sure to use a set of cpu's included in the +device's local_cpulist: /sys/class/net/<ethX>/device/local_cpulist. + +3. Configure as many Rx/Tx queues in the VM as available. Do not rely on +the default setting of 1. + + +Non-virtualized Environments +---------------------------- +Pin the adapter's IRQs to specific cores by disabling the irqbalance service +and using the included set_irq_affinity script. Please see the script's help +text for further options. + +- The following settings will distribute the IRQs across all the cores evenly:: + + # scripts/set_irq_affinity -x all <interface1> , [ <interface2>, ... ] + +- The following settings will distribute the IRQs across all the cores that are + local to the adapter (same NUMA node):: + + # scripts/set_irq_affinity -x local <interface1> ,[ <interface2>, ... ] + +For very CPU intensive workloads, we recommend pinning the IRQs to all cores. + +For IP Forwarding: Disable Adaptive ITR and lower Rx and Tx interrupts per +queue using ethtool. + +- Setting rx-usecs and tx-usecs to 125 will limit interrupts to about 8000 + interrupts per second per queue. + +:: + + # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 125 \ + tx-usecs 125 + +For lower CPU utilization: Disable Adaptive ITR and lower Rx and Tx interrupts +per queue using ethtool. + +- Setting rx-usecs and tx-usecs to 250 will limit interrupts to about 4000 + interrupts per second per queue. + +:: + + # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 250 \ + tx-usecs 250 + +For lower latency: Disable Adaptive ITR and ITR by setting Rx and Tx to 0 using +ethtool. + +:: + + # ethtool -C <interface> adaptive-rx off adaptive-tx off rx-usecs 0 \ + tx-usecs 0 + +Application Device Queues (ADq) +------------------------------- +Application Device Queues (ADq) allows you to dedicate one or more queues to a +specific application. This can reduce latency for the specified application, +and allow Tx traffic to be rate limited per application. Follow the steps below +to set ADq. + +1. Create traffic classes (TCs). Maximum of 8 TCs can be created per interface. +The shaper bw_rlimit parameter is optional. + +Example: Sets up two tcs, tc0 and tc1, with 16 queues each and max tx rate set +to 1Gbit for tc0 and 3Gbit for tc1. + +:: + + # tc qdisc add dev <interface> root mqprio num_tc 2 map 0 0 0 0 1 1 1 1 + queues 16@0 16@16 hw 1 mode channel shaper bw_rlimit min_rate 1Gbit 2Gbit + max_rate 1Gbit 3Gbit + +map: priority mapping for up to 16 priorities to tcs (e.g. map 0 0 0 0 1 1 1 1 +sets priorities 0-3 to use tc0 and 4-7 to use tc1) + +queues: for each tc, <num queues>@<offset> (e.g. queues 16@0 16@16 assigns +16 queues to tc0 at offset 0 and 16 queues to tc1 at offset 16. Max total +number of queues for all tcs is 64 or number of cores, whichever is lower.) + +hw 1 mode channel: ‘channel’ with ‘hw’ set to 1 is a new new hardware +offload mode in mqprio that makes full use of the mqprio options, the +TCs, the queue configurations, and the QoS parameters. + +shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates. +Totals must be equal or less than port speed. + +For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network +monitoring tools such as ifstat or sar –n DEV [interval] [number of samples] + +2. Enable HW TC offload on interface:: + + # ethtool -K <interface> hw-tc-offload on + +3. Apply TCs to ingress (RX) flow of interface:: + + # tc qdisc add dev <interface> ingress + +NOTES: + - Run all tc commands from the iproute2 <pathtoiproute2>/tc/ directory. + - ADq is not compatible with cloud filters. + - Setting up channels via ethtool (ethtool -L) is not supported when the + TCs are configured using mqprio. + - You must have iproute2 latest version + - NVM version 6.01 or later is required. + - ADq cannot be enabled when any the following features are enabled: Data + Center Bridging (DCB), Multiple Functions per Port (MFP), or Sideband + Filters. + - If another driver (for example, DPDK) has set cloud filters, you cannot + enable ADq. + - Tunnel filters are not supported in ADq. If encapsulated packets do + arrive in non-tunnel mode, filtering will be done on the inner headers. + For example, for VXLAN traffic in non-tunnel mode, PCTYPE is identified + as a VXLAN encapsulated packet, outer headers are ignored. Therefore, + inner headers are matched. + - If a TC filter on a PF matches traffic over a VF (on the PF), that + traffic will be routed to the appropriate queue of the PF, and will + not be passed on the VF. Such traffic will end up getting dropped higher + up in the TCP/IP stack as it does not match PF address data. + - If traffic matches multiple TC filters that point to different TCs, + that traffic will be duplicated and sent to all matching TC queues. + The hardware switch mirrors the packet to a VSI list when multiple + filters are matched. + + +Known Issues/Troubleshooting +============================ + +NOTE: 1 Gb devices based on the Intel(R) Ethernet Network Connection X722 do +not support the following features: + + * Data Center Bridging (DCB) + * QOS + * VMQ + * SR-IOV + * Task Encapsulation offload (VXLAN, NVGRE) + * Energy Efficient Ethernet (EEE) + * Auto-media detect + +Unexpected Issues when the device driver and DPDK share a device +---------------------------------------------------------------- +Unexpected issues may result when an i40e device is in multi driver mode and +the kernel driver and DPDK driver are sharing the device. This is because +access to the global NIC resources is not synchronized between multiple +drivers. Any change to the global NIC configuration (writing to a global +register, setting global configuration by AQ, or changing switch modes) will +affect all ports and drivers on the device. Loading DPDK with the +"multi-driver" module parameter may mitigate some of the issues. + +TC0 must be enabled when setting up DCB on a switch +--------------------------------------------------- +The kernel assumes that TC0 is available, and will disable Priority Flow +Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is +enabled when setting up DCB on your switch. + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt deleted file mode 100644 index c2d6e1824b29..000000000000 --- a/Documentation/networking/i40e.txt +++ /dev/null @@ -1,190 +0,0 @@ -Linux Base Driver for the Intel(R) Ethernet Controller XL710 Family -=================================================================== - -Intel i40e Linux driver. -Copyright(c) 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Additional Configurations -- Performance Tuning -- Known Issues -- Support - - -Identifying Your Adapter -======================== - -The driver in this release is compatible with the Intel Ethernet -Controller XL710 Family. - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/network/sb/CS-012904.htm - - -Enabling the driver -=================== - -The driver is enabled via the standard kernel configuration system, -using the make command: - - make config/oldconfig/menuconfig/etc. - -The driver is located in the menu structure at: - - -> Device Drivers - -> Network device support (NETDEVICES [=y]) - -> Ethernet driver support - -> Intel devices - -> Intel(R) Ethernet Controller XL710 Family - -Additional Configurations -========================= - - Generic Receive Offload (GRO) - ----------------------------- - The driver supports the in-kernel software implementation of GRO. GRO has - shown that by coalescing Rx traffic into larger chunks of data, CPU - utilization can be significantly reduced when under large Rx load. GRO is - an evolution of the previously-used LRO interface. GRO is able to coalesce - other protocols besides TCP. It's also safe to use with configurations that - are problematic for LRO, namely bridging and iSCSI. - - Ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The latest - ethtool version is required for this functionality. - - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool - - - Flow Director n-ntuple traffic filters (FDir) - --------------------------------------------- - The driver utilizes the ethtool interface for configuring ntuple filters, - via "ethtool -N <device> <filter>". - - The sctp4, ip4, udp4, and tcp4 flow types are supported with the standard - fields including src-ip, dst-ip, src-port and dst-port. The driver only - supports fully enabling or fully masking the fields, so use of the mask - fields for partial matches is not supported. - - Additionally, the driver supports using the action to specify filters for a - Virtual Function. You can specify the action as a 64bit value, where the - lower 32 bits represents the queue number, while the next 8 bits represent - which VF. Note that 0 is the PF, so the VF identifier is offset by 1. For - example: - - ... action 0x800000002 ... - - Would indicate to direct traffic for Virtual Function 7 (8 minus 1) on queue - 2 of that VF. - - The driver also supports using the user-defined field to specify 2 bytes of - arbitrary data to match within the packet payload in addition to the regular - fields. The data is specified in the lower 32bits of the user-def field in - the following way: - - +----------------------------+---------------------------+ - | 31 28 24 20 16 | 15 12 8 4 0| - +----------------------------+---------------------------+ - | offset into packet payload | 2 bytes of flexible data | - +----------------------------+---------------------------+ - - As an example, - - ... user-def 0x4FFFF .... - - means to match the value 0xFFFF 4 bytes into the packet payload. Note that - the offset is based on the beginning of the payload, and not the beginning - of the packet. Thus - - flow-type tcp4 ... user-def 0x8BEAF .... - - would match TCP/IPv4 packets which have the value 0xBEAF 8bytes into the - TCP/IPv4 payload. - - For ICMP, the hardware parses the ICMP header as 4 bytes of header and 4 - bytes of payload, so if you want to match an ICMP frames payload you may need - to add 4 to the offset in order to match the data. - - Furthermore, the offset can only be up to a value of 64, as the hardware - will only read up to 64 bytes of data from the payload. It must also be even - as the flexible data is 2 bytes long and must be aligned to byte 0 of the - packet payload. - - When programming filters, the hardware is limited to using a single input - set for each flow type. This means that it is an error to program two - different filters with the same type that don't match on the same fields. - Thus the second of the following two commands will fail: - - ethtool -N <device> flow-type tcp4 src-ip 192.168.0.7 action 5 - ethtool -N <device> flow-type tcp4 dst-ip 192.168.15.18 action 1 - - This is because the first filter will be accepted and reprogram the input - set for TCPv4 filters, but the second filter will be unable to reprogram the - input set until all the conflicting TCPv4 filters are first removed. - - Note that the user-defined flexible offset is also considered part of the - input set and cannot be programmed separately for multiple filters of the - same type. However, the flexible data is not part of the input set and - multiple filters may use the same offset but match against different data. - - Data Center Bridging (DCB) - -------------------------- - DCB configuration is not currently supported. - - FCoE - ---- - The driver supports Fiber Channel over Ethernet (FCoE) and Data Center - Bridging (DCB) functionality. Configuring DCB and FCoE is outside the scope - of this driver doc. Refer to http://www.open-fcoe.org/ for FCoE project - information and http://www.open-lldp.org/ or email list - e1000-eedc@lists.sourceforge.net for DCB information. - - MAC and VLAN anti-spoofing feature - ---------------------------------- - When a malicious driver attempts to send a spoofed packet, it is dropped by - the hardware and not transmitted. An interrupt is sent to the PF driver - notifying it of the spoof attempt. - - When a spoofed packet is detected the PF driver will send the following - message to the system log (displayed by the "dmesg" command): - - Spoof event(s) detected on VF (n) - - Where n=the VF that attempted to do the spoofing. - - -Performance Tuning -================== - -An excellent article on performance tuning can be found at: - -http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf - - -Known Issues -============ - - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://e1000.sourceforge.net - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sourceforge.net and copy -netdev@vger.kernel.org. diff --git a/Documentation/networking/iavf.rst b/Documentation/networking/iavf.rst new file mode 100644 index 000000000000..f8b42b64eb28 --- /dev/null +++ b/Documentation/networking/iavf.rst @@ -0,0 +1,281 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function +================================================================== + +Intel Ethernet Adaptive Virtual Function Linux driver. +Copyright(c) 2013-2018 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Additional Configurations +- Known Issues/Troubleshooting +- Support + +This file describes the iavf Linux* Base Driver. This driver was formerly +called i40evf. + +The iavf driver supports the below mentioned virtual function devices and +can only be activated on kernels running the i40e or newer Physical Function +(PF) driver compiled with CONFIG_PCI_IOV. The iavf driver requires +CONFIG_PCI_MSI to be enabled. + +The guest OS loading the iavf driver must support MSI-X interrupts. + +Identifying Your Adapter +======================== +The driver in this kernel is compatible with devices based on the following: + * Intel(R) XL710 X710 Virtual Function + * Intel(R) X722 Virtual Function + * Intel(R) XXV710 Virtual Function + * Intel(R) Ethernet Adaptive Virtual Function + +For the best performance, make sure the latest NVM/FW is installed on your +device. + +For information on how to identify your adapter, and for the latest NVM/FW +images and Intel network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Additional Features and Configurations +====================================== + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + dmesg -n 8 + +NOTE: This setting is not saved across reboots. + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: +https://www.kernel.org/pub/software/network/ethtool/ + +Setting VLAN Tag Stripping +-------------------------- +If you have applications that require Virtual Functions (VFs) to receive +packets with VLAN tags, you can disable VLAN tag stripping for the VF. The +Physical Function (PF) processes requests issued from the VF to enable or +disable VLAN tag stripping. Note that if the PF has assigned a VLAN to a VF, +then requests from that VF to set VLAN tag stripping will be ignored. + +To enable/disable VLAN tag stripping for a VF, issue the following command +from inside the VM in which you are running the VF:: + + ethtool -K <if_name> rxvlan on/off + +or alternatively:: + + ethtool --offload <if_name> rxvlan on/off + +Adaptive Virtual Function +------------------------- +Adaptive Virtual Function (AVF) allows the virtual function driver, or VF, to +adapt to changing feature sets of the physical function driver (PF) with which +it is associated. This allows system administrators to update a PF without +having to update all the VFs associated with it. All AVFs have a single common +device ID and branding string. + +AVFs have a minimum set of features known as "base mode," but may provide +additional features depending on what features are available in the PF with +which the AVF is associated. The following are base mode features: + +- 4 Queue Pairs (QP) and associated Configuration Status Registers (CSRs) + for Tx/Rx. +- i40e descriptors and ring format. +- Descriptor write-back completion. +- 1 control queue, with i40e descriptors, CSRs and ring format. +- 5 MSI-X interrupt vectors and corresponding i40e CSRs. +- 1 Interrupt Throttle Rate (ITR) index. +- 1 Virtual Station Interface (VSI) per VF. +- 1 Traffic Class (TC), TC0 +- Receive Side Scaling (RSS) with 64 entry indirection table and key, + configured through the PF. +- 1 unicast MAC address reserved per VF. +- 16 MAC address filters for each VF. +- Stateless offloads - non-tunneled checksums. +- AVF device ID. +- HW mailbox is used for VF to PF communications (including on Windows). + +IEEE 802.1ad (QinQ) Support +--------------------------- +The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN +IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as +"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks +allow L2 tunneling and the ability to segregate traffic within a particular +VLAN ID, among other uses. + +The following are examples of how to configure 802.1ad (QinQ):: + + ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24 + ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371 + +Where "24" and "371" are example VLAN IDs. + +NOTES: + Receive checksum offloads, cloud filters, and VLAN acceleration are not + supported for 802.1ad (QinQ) packets. + +Application Device Queues (ADq) +------------------------------- +Application Device Queues (ADq) allows you to dedicate one or more queues to a +specific application. This can reduce latency for the specified application, +and allow Tx traffic to be rate limited per application. Follow the steps below +to set ADq. + +1. Create traffic classes (TCs). Maximum of 8 TCs can be created per interface. +The shaper bw_rlimit parameter is optional. + +Example: Sets up two tcs, tc0 and tc1, with 16 queues each and max tx rate set +to 1Gbit for tc0 and 3Gbit for tc1. + +:: + + # tc qdisc add dev <interface> root mqprio num_tc 2 map 0 0 0 0 1 1 1 1 + queues 16@0 16@16 hw 1 mode channel shaper bw_rlimit min_rate 1Gbit 2Gbit + max_rate 1Gbit 3Gbit + +map: priority mapping for up to 16 priorities to tcs (e.g. map 0 0 0 0 1 1 1 1 +sets priorities 0-3 to use tc0 and 4-7 to use tc1) + +queues: for each tc, <num queues>@<offset> (e.g. queues 16@0 16@16 assigns +16 queues to tc0 at offset 0 and 16 queues to tc1 at offset 16. Max total +number of queues for all tcs is 64 or number of cores, whichever is lower.) + +hw 1 mode channel: ‘channel’ with ‘hw’ set to 1 is a new new hardware +offload mode in mqprio that makes full use of the mqprio options, the +TCs, the queue configurations, and the QoS parameters. + +shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates. +Totals must be equal or less than port speed. + +For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network +monitoring tools such as ifstat or sar –n DEV [interval] [number of samples] + +2. Enable HW TC offload on interface:: + + # ethtool -K <interface> hw-tc-offload on + +3. Apply TCs to ingress (RX) flow of interface:: + + # tc qdisc add dev <interface> ingress + +NOTES: + - Run all tc commands from the iproute2 <pathtoiproute2>/tc/ directory. + - ADq is not compatible with cloud filters. + - Setting up channels via ethtool (ethtool -L) is not supported when the TCs + are configured using mqprio. + - You must have iproute2 latest version + - NVM version 6.01 or later is required. + - ADq cannot be enabled when any the following features are enabled: Data + Center Bridging (DCB), Multiple Functions per Port (MFP), or Sideband Filters. + - If another driver (for example, DPDK) has set cloud filters, you cannot + enable ADq. + - Tunnel filters are not supported in ADq. If encapsulated packets do arrive + in non-tunnel mode, filtering will be done on the inner headers. For example, + for VXLAN traffic in non-tunnel mode, PCTYPE is identified as a VXLAN + encapsulated packet, outer headers are ignored. Therefore, inner headers are + matched. + - If a TC filter on a PF matches traffic over a VF (on the PF), that traffic + will be routed to the appropriate queue of the PF, and will not be passed on + the VF. Such traffic will end up getting dropped higher up in the TCP/IP + stack as it does not match PF address data. + - If traffic matches multiple TC filters that point to different TCs, that + traffic will be duplicated and sent to all matching TC queues. The hardware + switch mirrors the packet to a VSI list when multiple filters are matched. + + +Known Issues/Troubleshooting +============================ + +Traffic Is Not Being Passed Between VM and Client +------------------------------------------------- +You may not be able to pass traffic between a client system and a +Virtual Machine (VM) running on a separate host if the Virtual Function +(VF, or Virtual NIC) is not in trusted mode and spoof checking is enabled +on the VF. Note that this situation can occur in any combination of client, +host, and guest operating system. For information on how to set the VF to +trusted mode, refer to the section "VLAN Tag Packet Steering" in this +readme document. For information on setting spoof checking, refer to the +section "MAC and VLAN anti-spoofing feature" in this readme document. + +Do not unload port driver if VF with active VM is bound to it +------------------------------------------------------------- +Do not unload a port's driver if a Virtual Function (VF) with an active Virtual +Machine (VM) is bound to it. Doing so will cause the port to appear to hang. +Once the VM shuts down, or otherwise releases the VF, the command will complete. + +Virtual machine does not get link +--------------------------------- +If the virtual machine has more than one virtual port assigned to it, and those +virtual ports are bound to different physical ports, you may not get link on +all of the virtual ports. The following command may work around the issue:: + + ethtool -r <PF> + +Where <PF> is the PF interface in the host, for example: p5p1. You may need to +run the command more than once to get link on all virtual ports. + +MAC address of Virtual Function changes unexpectedly +---------------------------------------------------- +If a Virtual Function's MAC address is not assigned in the host, then the VF +(virtual function) driver will use a random MAC address. This random MAC +address may change each time the VF driver is reloaded. You can assign a static +MAC address in the host machine. This static MAC address will survive +a VF driver reload. + +Driver Buffer Overflow Fix +-------------------------- +The fix to resolve CVE-2016-8105, referenced in Intel SA-00069 +https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00069.html +is included in this and future versions of the driver. + +Multiple Interfaces on Same Ethernet Broadcast Network +------------------------------------------------------ +Due to the default ARP behavior on Linux, it is not possible to have one system +on two IP networks in the same Ethernet broadcast domain (non-partitioned +switch) behave as expected. All Ethernet interfaces will respond to IP traffic +for any IP address assigned to the system. This results in unbalanced receive +traffic. + +If you have multiple interfaces in a server, either turn on ARP filtering by +entering:: + + echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter + +NOTE: This setting is not saved across reboots. The configuration change can be +made permanent by adding the following line to the file /etc/sysctl.conf:: + + net.ipv4.conf.all.arp_filter = 1 + +Another alternative is to install the interfaces in separate broadcast domains +(either in different switches or in a switch partitioned to VLANs). + +Rx Page Allocation Errors +------------------------- +'Page allocation failure. order:0' errors may occur under stress. +This is caused by the way the Linux kernel reports this stressed condition. + + +Support +======= +For general information, go to the Intel support website at: + +https://support.intel.com + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on the supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net diff --git a/Documentation/networking/iavf.txt b/Documentation/networking/iavf.txt deleted file mode 100644 index cc902a2369d6..000000000000 --- a/Documentation/networking/iavf.txt +++ /dev/null @@ -1,56 +0,0 @@ -Linux* Base Driver for Intel(R) Network Connection -================================================== - -Intel Ethernet Adaptive Virtual Function Linux driver. -Copyright(c) 2013-2018 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Known Issues/Troubleshooting -- Support - -This file describes the iavf Linux* Base Driver. This driver -was formerly called i40evf. - -The iavf driver supports the below mentioned virtual function -devices and can only be activated on kernels running the i40e or -newer Physical Function (PF) driver compiled with CONFIG_PCI_IOV. -The iavf driver requires CONFIG_PCI_MSI to be enabled. - -The guest OS loading the iavf driver must support MSI-X interrupts. - -Supported Hardware -================== -Intel XL710 X710 Virtual Function -Intel X722 Virtual Function -Intel Ethernet Adaptive Virtual Function - -Identifying Your Adapter -======================== - -For more information on how to identify your adapter, go to the -Adapter & Driver ID Guide at: - - https://www.intel.com/content/www/us/en/support/articles/000005584/network-and-i-o/ethernet-products.html - - -Known Issues/Troubleshooting -============================ - - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/ice.rst b/Documentation/networking/ice.rst new file mode 100644 index 000000000000..1e4948c9e989 --- /dev/null +++ b/Documentation/networking/ice.rst @@ -0,0 +1,45 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series +=================================================================== + +Intel ice Linux driver. +Copyright(c) 2018 Intel Corporation. + +Contents +======== + +- Enabling the driver +- Support + +The driver in this release supports Intel's E800 Series of products. For +more information, visit Intel's support page at https://support.intel.com. + +Enabling the driver +=================== +The driver is enabled via the standard kernel configuration system, +using the make command:: + + make oldconfig/silentoldconfig/menuconfig/etc. + +The driver is located in the menu structure at: + + -> Device Drivers + -> Network device support (NETDEVICES [=y]) + -> Ethernet driver support + -> Intel devices + -> Intel(R) Ethernet Connection E800 Series Support + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/ice.txt b/Documentation/networking/ice.txt deleted file mode 100644 index 6261c46378e1..000000000000 --- a/Documentation/networking/ice.txt +++ /dev/null @@ -1,39 +0,0 @@ -Intel(R) Ethernet Connection E800 Series Linux Driver -=================================================================== - -Intel ice Linux driver. -Copyright(c) 2018 Intel Corporation. - -Contents -======== -- Enabling the driver -- Support - -The driver in this release supports Intel's E800 Series of products. For -more information, visit Intel's support page at http://support.intel.com. - -Enabling the driver -=================== - -The driver is enabled via the standard kernel configuration system, -using the make command: - - Make oldconfig/silentoldconfig/menuconfig/etc. - -The driver is located in the menu structure at: - - -> Device Drivers - -> Network device support (NETDEVICES [=y]) - -> Ethernet driver support - -> Intel devices - -> Intel(R) Ethernet Connection E800 Series Support - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -If an issue is identified with the released source code, please email -the maintainer listed in the MAINTAINERS file. diff --git a/Documentation/networking/igb.rst b/Documentation/networking/igb.rst new file mode 100644 index 000000000000..ba16b86d5593 --- /dev/null +++ b/Documentation/networking/igb.rst @@ -0,0 +1,193 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Driver for Intel(R) Ethernet Network Connection +=========================================================== + +Intel Gigabit Linux driver. +Copyright(c) 1999-2018 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Command Line Parameters +- Additional Configurations +- Support + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Command Line Parameters +======================== +If the driver is built as a module, the following optional parameters are used +by entering them on the command line with the modprobe command using this +syntax:: + + modprobe igb [<option>=<VAL1>,<VAL2>,...] + +There needs to be a <VAL#> for each network port in the system supported by +this driver. The values will be applied to each instance, in function order. +For example:: + + modprobe igb max_vfs=2,4 + +In this case, there are two network ports supported by igb in the system. + +NOTE: A descriptor describes a data buffer and attributes related to the data +buffer. This information is accessed by the hardware. + +max_vfs +------- +:Valid Range: 0-7 + +This parameter adds support for SR-IOV. It causes the driver to spawn up to +max_vfs worth of virtual functions. If the value is greater than 0 it will +also force the VMDq parameter to be 1 or more. + +The parameters for the driver are referenced by position. Thus, if you have a +dual port adapter, or more than one adapter in your system, and want N virtual +functions per port, you must specify a number for each port with each parameter +separated by a comma. For example:: + + modprobe igb max_vfs=4 + +This will spawn 4 VFs on the first port. + +:: + + modprobe igb max_vfs=2,4 + +This will spawn 2 VFs on the first port and 4 VFs on the second port. + +NOTE: Caution must be used in loading the driver with these parameters. +Depending on your system configuration, number of slots, etc., it is impossible +to predict in all cases where the positions would be on the command line. + +NOTE: Neither the device nor the driver control how VFs are mapped into config +space. Bus layout will vary by operating system. On operating systems that +support it, you can check sysfs to find the mapping. + +NOTE: When either SR-IOV mode or VMDq mode is enabled, hardware VLAN filtering +and VLAN tag stripping/insertion will remain enabled. Please remove the old +VLAN filter before the new VLAN filter is added. For example:: + + ip link set eth0 vf 0 vlan 100 // set vlan 100 for VF 0 + ip link set eth0 vf 0 vlan 0 // Delete vlan 100 + ip link set eth0 vf 0 vlan 200 // set a new vlan 200 for VF 0 + +Debug +----- +:Valid Range: 0-16 (0=none,...,16=all) +:Default Value: 0 + +This parameter adjusts the level debug messages displayed in the system logs. + + +Additional Features and Configurations +====================================== + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ifconfig command to increase the MTU size. For example, enter the +following where <x> is the interface number:: + + ifconfig eth<x> mtu 9000 up + +Alternatively, you can use the ip command as follows:: + + ip link set mtu 9000 dev eth<x> + ip link set up dev eth<x> + +This setting is not saved across reboots. The setting change can be made +permanent by adding 'MTU=9000' to the file: + +- For RHEL: /etc/sysconfig/network-scripts/ifcfg-eth<x> +- For SLES: /etc/sysconfig/network/<config_file> + +NOTE: The maximum MTU setting for Jumbo Frames is 9216. This value coincides +with the maximum Jumbo Frames size of 9234 bytes. + +NOTE: Using Jumbo frames at 10 or 100 Mbps is not supported and may result in +poor performance or loss of link. + + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: + +https://www.kernel.org/pub/software/network/ethtool/ + + +Enabling Wake on LAN* (WoL) +--------------------------- +WoL is configured through the ethtool* utility. + +WoL will be enabled on the system during the next shut down or reboot. For +this driver version, in order to enable WoL, the igb driver must be loaded +prior to shutting down or suspending the system. + +NOTE: Wake on LAN is only supported on port A of multi-port devices. Also +Wake On LAN is not supported for the following device: +- Intel(R) Gigabit VT Quad Port Server Adapter + + +Multiqueue +---------- +In this mode, a separate MSI-X vector is allocated for each queue and one for +"other" interrupts such as link status change and errors. All interrupts are +throttled via interrupt moderation. Interrupt moderation must be used to avoid +interrupt storms while the driver is processing one interrupt. The moderation +value should be at least as large as the expected time for the driver to +process an interrupt. Multiqueue is off by default. + +REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not found, +the system will fallback to MSI or to Legacy interrupts. This driver supports +receive multiqueue on all kernels that support MSI-X. + +NOTE: On some kernels a reboot is required to switch between single queue mode +and multiqueue mode or vice-versa. + + +MAC and VLAN anti-spoofing feature +---------------------------------- +When a malicious driver attempts to send a spoofed packet, it is dropped by the +hardware and not transmitted. + +An interrupt is sent to the PF driver notifying it of the spoof attempt. When a +spoofed packet is detected, the PF driver will send the following message to +the system log (displayed by the "dmesg" command): +Spoof event(s) detected on VF(n), where n = the VF that attempted to do the +spoofing + + +Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool +------------------------------------------------------------ +You can set a MAC address of a Virtual Function (VF), a default VLAN and the +rate limit using the IProute2 tool. Download the latest version of the +IProute2 tool from Sourceforge if your version does not have all the features +you require. + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt deleted file mode 100644 index f90643ef39c9..000000000000 --- a/Documentation/networking/igb.txt +++ /dev/null @@ -1,129 +0,0 @@ -Linux* Base Driver for Intel(R) Ethernet Network Connection -=========================================================== - -Intel Gigabit Linux driver. -Copyright(c) 1999 - 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Additional Configurations -- Support - -Identifying Your Adapter -======================== - -This driver supports all 82575, 82576 and 82580-based Intel (R) gigabit network -connections. - -For specific information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/go/network/adapter/idguide.htm - -Command Line Parameters -======================= - -The default value for each parameter is generally the recommended setting, -unless otherwise noted. - -max_vfs -------- -Valid Range: 0-7 -Default Value: 0 - -This parameter adds support for SR-IOV. It causes the driver to spawn up to -max_vfs worth of virtual function. - -Additional Configurations -========================= - - Jumbo Frames - ------------ - Jumbo Frames support is enabled by changing the MTU to a value larger than - the default of 1500. Use the ip command to increase the MTU size. - For example: - - ip link set dev eth<x> mtu 9000 - - This setting is not saved across reboots. - - Notes: - - - The maximum MTU setting for Jumbo Frames is 9216. This value coincides - with the maximum Jumbo Frames size of 9234 bytes. - - - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in - poor performance or loss of link. - - ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The latest - version of ethtool can be found at: - - https://www.kernel.org/pub/software/network/ethtool/ - - Enabling Wake on LAN* (WoL) - --------------------------- - WoL is configured through the ethtool* utility. - - For instructions on enabling WoL with ethtool, refer to the ethtool man page. - - WoL will be enabled on the system during the next shut down or reboot. - For this driver version, in order to enable WoL, the igb driver must be - loaded when shutting down or rebooting the system. - - Wake On LAN is only supported on port A of multi-port adapters. - - Wake On LAN is not supported for the Intel(R) Gigabit VT Quad Port Server - Adapter. - - Multiqueue - ---------- - In this mode, a separate MSI-X vector is allocated for each queue and one - for "other" interrupts such as link status change and errors. All - interrupts are throttled via interrupt moderation. Interrupt moderation - must be used to avoid interrupt storms while the driver is processing one - interrupt. The moderation value should be at least as large as the expected - time for the driver to process an interrupt. Multiqueue is off by default. - - REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not - found, the system will fallback to MSI or to Legacy interrupts. - - MAC and VLAN anti-spoofing feature - ---------------------------------- - When a malicious driver attempts to send a spoofed packet, it is dropped by - the hardware and not transmitted. An interrupt is sent to the PF driver - notifying it of the spoof attempt. - - When a spoofed packet is detected the PF driver will send the following - message to the system log (displayed by the "dmesg" command): - - Spoof event(s) detected on VF(n) - - Where n=the VF that attempted to do the spoofing. - - Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool - ------------------------------------------------------------ - You can set a MAC address of a Virtual Function (VF), a default VLAN and the - rate limit using the IProute2 tool. Download the latest version of the - iproute2 tool from Sourceforge if your version does not have all the - features you require. - - -Support -======= - -For general information, go to the Intel support website at: - - www.intel.com/support/ - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/igbvf.rst b/Documentation/networking/igbvf.rst new file mode 100644 index 000000000000..a8a9ffa4f8d3 --- /dev/null +++ b/Documentation/networking/igbvf.rst @@ -0,0 +1,64 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet +============================================================ + +Intel Gigabit Virtual Function Linux driver. +Copyright(c) 1999-2018 Intel Corporation. + +Contents +======== +- Identifying Your Adapter +- Additional Configurations +- Support + +This driver supports Intel 82576-based virtual function devices-based virtual +function devices that can only be activated on kernels that support SR-IOV. + +SR-IOV requires the correct platform and OS support. + +The guest OS loading this driver must support MSI-X interrupts. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + +Driver information can be obtained using ethtool, lspci, and ifconfig. +Instructions on updating ethtool can be found in the section Additional +Configurations later in this document. + +NOTE: There is a limit of a total of 32 shared VLANs to 1 or more VFs. + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Additional Features and Configurations +====================================== + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: + +https://www.kernel.org/pub/software/network/ethtool/ + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/igbvf.txt b/Documentation/networking/igbvf.txt deleted file mode 100644 index bd404735fb46..000000000000 --- a/Documentation/networking/igbvf.txt +++ /dev/null @@ -1,80 +0,0 @@ -Linux* Base Driver for Intel(R) Ethernet Network Connection -=========================================================== - -Intel Gigabit Linux driver. -Copyright(c) 1999 - 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Additional Configurations -- Support - -This file describes the igbvf Linux* Base Driver for Intel Network Connection. - -The igbvf driver supports 82576-based virtual function devices that can only -be activated on kernels that support SR-IOV. SR-IOV requires the correct -platform and OS support. - -The igbvf driver requires the igb driver, version 2.0 or later. The igbvf -driver supports virtual functions generated by the igb driver with a max_vfs -value of 1 or greater. For more information on the max_vfs parameter refer -to the README included with the igb driver. - -The guest OS loading the igbvf driver must support MSI-X interrupts. - -This driver is only supported as a loadable module at this time. Intel is -not supplying patches against the kernel source to allow for static linking -of the driver. For questions related to hardware requirements, refer to the -documentation supplied with your Intel Gigabit adapter. All hardware -requirements listed apply to use with Linux. - -Instructions on updating ethtool can be found in the section "Additional -Configurations" later in this document. - -VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs. - -Identifying Your Adapter -======================== - -The igbvf driver supports 82576-based virtual function devices that can only -be activated on kernels that support SR-IOV. - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/go/network/adapter/idguide.htm - -For the latest Intel network drivers for Linux, refer to the following -website. In the search field, enter your adapter name or type, or use the -networking link on the left to search for your adapter: - - http://downloadcenter.intel.com/scripts-df-external/Support_Intel.aspx - -Additional Configurations -========================= - - ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The ethtool - version 3.0 or later is required for this functionality, although we - strongly recommend downloading the latest version at: - - https://www.kernel.org/pub/software/network/ethtool/ - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index fcd710f2cc7a..bd89dae8d578 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -14,6 +14,16 @@ Contents: dpaa2/index e100 e1000 + e1000e + fm10k + igb + igbvf + ixgb + ixgbe + ixgbevf + i40e + iavf + ice kapi z8530book msg_zerocopy diff --git a/Documentation/networking/ixgb.rst b/Documentation/networking/ixgb.rst new file mode 100644 index 000000000000..8bd80e27843d --- /dev/null +++ b/Documentation/networking/ixgb.rst @@ -0,0 +1,467 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection +===================================================================== + +October 1, 2018 + + +Contents +======== + +- In This Release +- Identifying Your Adapter +- Command Line Parameters +- Improving Performance +- Additional Configurations +- Known Issues/Troubleshooting +- Support + + + +In This Release +=============== + +This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R) +Network Connection. This driver includes support for Itanium(R)2-based +systems. + +For questions related to hardware requirements, refer to the documentation +supplied with your 10 Gigabit adapter. All hardware requirements listed apply +to use with Linux. + +The following features are available in this kernel: + - Native VLANs + - Channel Bonding (teaming) + - SNMP + +Channel Bonding documentation can be found in the Linux kernel source: +/Documentation/networking/bonding.txt + +The driver information previously displayed in the /proc filesystem is not +supported in this release. Alternatively, you can use ethtool (version 1.6 +or later), lspci, and iproute2 to obtain the same information. + +Instructions on updating ethtool can be found in the section "Additional +Configurations" later in this document. + + +Identifying Your Adapter +======================== + +The following Intel network adapters are compatible with the drivers in this +release: + ++------------+------------------------------+----------------------------------+ +| Controller | Adapter Name | Physical Layer | ++============+==============================+==================================+ +| 82597EX | Intel(R) PRO/10GbE LR/SR/CX4 | - 10G Base-LR (fiber) | +| | Server Adapters | - 10G Base-SR (fiber) | +| | | - 10G Base-CX4 (copper) | ++------------+------------------------------+----------------------------------+ + +For more information on how to identify your adapter, go to the Adapter & +Driver ID Guide at: + + https://support.intel.com + + +Command Line Parameters +======================= + +If the driver is built as a module, the following optional parameters are +used by entering them on the command line with the modprobe command using +this syntax:: + + modprobe ixgb [<option>=<VAL1>,<VAL2>,...] + +For example, with two 10GbE PCI adapters, entering:: + + modprobe ixgb TxDescriptors=80,128 + +loads the ixgb driver with 80 TX resources for the first adapter and 128 TX +resources for the second adapter. + +The default value for each parameter is generally the recommended setting, +unless otherwise noted. + +Copybreak +--------- +:Valid Range: 0-XXXX +:Default Value: 256 + + This is the maximum size of packet that is copied to a new buffer on + receive. + +Debug +----- +:Valid Range: 0-16 (0=none,...,16=all) +:Default Value: 0 + + This parameter adjusts the level of debug messages displayed in the + system logs. + +FlowControl +----------- +:Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) +:Default Value: 1 if no EEPROM, otherwise read from EEPROM + + This parameter controls the automatic generation(Tx) and response(Rx) to + Ethernet PAUSE frames. There are hardware bugs associated with enabling + Tx flow control so beware. + +RxDescriptors +------------- +:Valid Range: 64-4096 +:Default Value: 1024 + + This value is the number of receive descriptors allocated by the driver. + Increasing this value allows the driver to buffer more incoming packets. + Each descriptor is 16 bytes. A receive buffer is also allocated for + each descriptor and can be either 2048, 4056, 8192, or 16384 bytes, + depending on the MTU setting. When the MTU size is 1500 or less, the + receive buffer size is 2048 bytes. When the MTU is greater than 1500 the + receive buffer size will be either 4056, 8192, or 16384 bytes. The + maximum MTU size is 16114. + +TxDescriptors +------------- +:Valid Range: 64-4096 +:Default Value: 256 + + This value is the number of transmit descriptors allocated by the driver. + Increasing this value allows the driver to queue more transmits. Each + descriptor is 16 bytes. + +RxIntDelay +---------- +:Valid Range: 0-65535 (0=off) +:Default Value: 72 + + This value delays the generation of receive interrupts in units of + 0.8192 microseconds. Receive interrupt reduction can improve CPU + efficiency if properly tuned for specific network traffic. Increasing + this value adds extra latency to frame reception and can end up + decreasing the throughput of TCP traffic. If the system is reporting + dropped receives, this value may be set too high, causing the driver to + run out of available receive descriptors. + +TxIntDelay +---------- +:Valid Range: 0-65535 (0=off) +:Default Value: 32 + + This value delays the generation of transmit interrupts in units of + 0.8192 microseconds. Transmit interrupt reduction can improve CPU + efficiency if properly tuned for specific network traffic. Increasing + this value adds extra latency to frame transmission and can end up + decreasing the throughput of TCP traffic. If this value is set too high, + it will cause the driver to run out of available transmit descriptors. + +XsumRX +------ +:Valid Range: 0-1 +:Default Value: 1 + + A value of '1' indicates that the driver should enable IP checksum + offload for received packets (both UDP and TCP) to the adapter hardware. + +RxFCHighThresh +-------------- +:Valid Range: 1,536-262,136 (0x600 - 0x3FFF8, 8 byte granularity) +:Default Value: 196,608 (0x30000) + + Receive Flow control high threshold (when we send a pause frame) + +RxFCLowThresh +------------- +:Valid Range: 64-262,136 (0x40 - 0x3FFF8, 8 byte granularity) +:Default Value: 163,840 (0x28000) + + Receive Flow control low threshold (when we send a resume frame) + +FCReqTimeout +------------ +:Valid Range: 1-65535 +:Default Value: 65535 + + Flow control request timeout (how long to pause the link partner's tx) + +IntDelayEnable +-------------- +:Value Range: 0,1 +:Default Value: 1 + + Interrupt Delay, 0 disables transmit interrupt delay and 1 enables it. + + +Improving Performance +===================== + +With the 10 Gigabit server adapters, the default Linux configuration will +very likely limit the total available throughput artificially. There is a set +of configuration changes that, when applied together, will increase the ability +of Linux to transmit and receive data. The following enhancements were +originally acquired from settings published at http://www.spec.org/web99/ for +various submitted results using Linux. + +NOTE: + These changes are only suggestions, and serve as a starting point for + tuning your network performance. + +The changes are made in three major ways, listed in order of greatest effect: + +- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen + parameter. +- Use sysctl to modify /proc parameters (essentially kernel tuning) +- Use setpci to modify the MMRBC field in PCI-X configuration space to increase + transmit burst lengths on the bus. + +NOTE: + setpci modifies the adapter's configuration registers to allow it to read + up to 4k bytes at a time (for transmits). However, for some systems the + behavior after modifying this register may be undefined (possibly errors of + some kind). A power-cycle, hard reset or explicitly setting the e6 register + back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a + stable configuration. + +- COPY these lines and paste them into ixgb_perf.sh: + +:: + + #!/bin/bash + echo "configuring network performance , edit this file to change the interface + or device ID of 10GbE card" + # set mmrbc to 4k reads, modify only Intel 10GbE device IDs + # replace 1a48 with appropriate 10GbE device's ID installed on the system, + # if needed. + setpci -d 8086:1a48 e6.b=2e + # set the MTU (max transmission unit) - it requires your switch and clients + # to change as well. + # set the txqueuelen + # your ixgb adapter should be loaded as eth1 for this to work, change if needed + ip li set dev eth1 mtu 9000 txqueuelen 1000 up + # call the sysctl utility to modify /proc/sys entries + sysctl -p ./sysctl_ixgb.conf + +- COPY these lines and paste them into sysctl_ixgb.conf: + +:: + + # some of the defaults may be different for your kernel + # call this file with sysctl -p <this file> + # these are just suggested values that worked well to increase throughput in + # several network benchmark tests, your mileage may vary + + ### IPV4 specific settings + # turn TCP timestamp support off, default 1, reduces CPU use + net.ipv4.tcp_timestamps = 0 + # turn SACK support off, default on + # on systems with a VERY fast bus -> memory interface this is the big gainer + net.ipv4.tcp_sack = 0 + # set min/default/max TCP read buffer, default 4096 87380 174760 + net.ipv4.tcp_rmem = 10000000 10000000 10000000 + # set min/pressure/max TCP write buffer, default 4096 16384 131072 + net.ipv4.tcp_wmem = 10000000 10000000 10000000 + # set min/pressure/max TCP buffer space, default 31744 32256 32768 + net.ipv4.tcp_mem = 10000000 10000000 10000000 + + ### CORE settings (mostly for socket and UDP effect) + # set maximum receive socket buffer size, default 131071 + net.core.rmem_max = 524287 + # set maximum send socket buffer size, default 131071 + net.core.wmem_max = 524287 + # set default receive socket buffer size, default 65535 + net.core.rmem_default = 524287 + # set default send socket buffer size, default 65535 + net.core.wmem_default = 524287 + # set maximum amount of option memory buffers, default 10240 + net.core.optmem_max = 524287 + # set number of unprocessed input packets before kernel starts dropping them; default 300 + net.core.netdev_max_backlog = 300000 + +Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface +your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's +ID installed on the system. + +NOTE: + Unless these scripts are added to the boot process, these changes will + only last only until the next system reboot. + + +Resolving Slow UDP Traffic +-------------------------- +If your server does not seem to be able to receive UDP traffic as fast as it +can receive TCP traffic, it could be because Linux, by default, does not set +the network stack buffers as large as they need to be to support high UDP +transfer rates. One way to alleviate this problem is to allow more memory to +be used by the IP stack to store incoming data. + +For instance, use the commands:: + + sysctl -w net.core.rmem_max=262143 + +and:: + + sysctl -w net.core.rmem_default=262143 + +to increase the read buffer memory max and default to 262143 (256k - 1) from +defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables +will increase the amount of memory used by the network stack for receives, and +can be increased significantly more if necessary for your application. + + +Additional Configurations +========================= + +Configuring the Driver on Different Distributions +------------------------------------------------- +Configuring a network driver to load properly when the system is started is +distribution dependent. Typically, the configuration process involves adding +an alias line to /etc/modprobe.conf as well as editing other system startup +scripts and/or configuration files. Many popular Linux distributions ship +with tools to make these changes for you. To learn the proper way to +configure a network device for your system, refer to your distribution +documentation. If during this process you are asked for the driver or module +name, the name for the Linux Base Driver for the Intel 10GbE Family of +Adapters is ixgb. + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + dmesg -n 8 + +NOTE: This setting is not saved across reboots. + +Jumbo Frames +------------ +The driver supports Jumbo Frames for all adapters. Jumbo Frames support is +enabled by changing the MTU to a value larger than the default of 1500. +The maximum value for the MTU is 16114. Use the ip command to +increase the MTU size. For example:: + + ip li set dev ethx mtu 9000 + +The maximum MTU setting for Jumbo Frames is 16114. This value coincides +with the maximum Jumbo Frames size of 16128. + +Ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The ethtool +version 1.6 or later is required for this functionality. + +The latest release of ethtool can be found from +https://www.kernel.org/pub/software/network/ethtool/ + +NOTE: + The ethtool version 1.6 only supports a limited set of ethtool options. + Support for a more complete ethtool feature set can be enabled by + upgrading to the latest version. + +NAPI +---- +NAPI (Rx polling mode) is supported in the ixgb driver. + +See https://wiki.linuxfoundation.org/networking/napi for more information on +NAPI. + + +Known Issues/Troubleshooting +============================ + +NOTE: + After installing the driver, if your Intel Network Connection is not + working, verify in the "In This Release" section of the readme that you have + installed the correct driver. + +Cable Interoperability Issue with Fujitsu XENPAK Module in SmartBits Chassis +---------------------------------------------------------------------------- +Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 +Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits +chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni. +The CRC errors may be received either by the Intel(R) PRO/10GbE CX4 +Server adapter or the SmartBits. If this situation occurs using a different +cable assembly may resolve the issue. + +Cable Interoperability Issues with HP Procurve 3400cl Switch Port +----------------------------------------------------------------- +Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server +adapter is connected to an HP Procurve 3400cl switch port using short cables +(1 m or shorter). If this situation occurs, using a longer cable may resolve +the issue. + +Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that +Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC +errors may be received either by the CX4 Server adapter or at the switch. If +this situation occurs, using a different cable assembly may resolve the issue. + +Jumbo Frames System Requirement +------------------------------- +Memory allocation failures have been observed on Linux systems with 64 MB +of RAM or less that are running Jumbo Frames. If you are using Jumbo +Frames, your system may require more than the advertised minimum +requirement of 64 MB of system memory. + +Performance Degradation with Jumbo Frames +----------------------------------------- +Degradation in throughput performance may be observed in some Jumbo frames +environments. If this is observed, increasing the application's socket buffer +size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. +See the specific application manual and /usr/src/linux*/Documentation/ +networking/ip-sysctl.txt for more details. + +Allocating Rx Buffers when Using Jumbo Frames +--------------------------------------------- +Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if +the available memory is heavily fragmented. This issue may be seen with PCI-X +adapters or with packet split disabled. This can be reduced or eliminated +by changing the amount of available memory for receive buffer allocation, by +increasing /proc/sys/vm/min_free_kbytes. + +Multiple Interfaces on Same Ethernet Broadcast Network +------------------------------------------------------ +Due to the default ARP behavior on Linux, it is not possible to have +one system on two IP networks in the same Ethernet broadcast domain +(non-partitioned switch) behave as expected. All Ethernet interfaces +will respond to IP traffic for any IP address assigned to the system. +This results in unbalanced receive traffic. + +If you have multiple interfaces in a server, do either of the following: + + - Turn on ARP filtering by entering:: + + echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter + + - Install the interfaces in separate broadcast domains - either in + different switches or in a switch partitioned to VLANs. + +UDP Stress Test Dropped Packet Issue +-------------------------------------- +Under small packets UDP stress test with 10GbE driver, the Linux system +may drop UDP packets due to the fullness of socket buffers. You may want +to change the driver's Flow Control variables to the minimum value for +controlling packet reception. + +Tx Hangs Possible Under Stress +------------------------------ +Under stress conditions, if TX hangs occur, turning off TSO +"ethtool -K eth0 tso off" may resolve the problem. + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt deleted file mode 100644 index 09f71d71920a..000000000000 --- a/Documentation/networking/ixgb.txt +++ /dev/null @@ -1,433 +0,0 @@ -Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection -===================================================================== - -March 14, 2011 - - -Contents -======== - -- In This Release -- Identifying Your Adapter -- Building and Installation -- Command Line Parameters -- Improving Performance -- Additional Configurations -- Known Issues/Troubleshooting -- Support - - - -In This Release -=============== - -This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R) -Network Connection. This driver includes support for Itanium(R)2-based -systems. - -For questions related to hardware requirements, refer to the documentation -supplied with your 10 Gigabit adapter. All hardware requirements listed apply -to use with Linux. - -The following features are available in this kernel: - - Native VLANs - - Channel Bonding (teaming) - - SNMP - -Channel Bonding documentation can be found in the Linux kernel source: -/Documentation/networking/bonding.txt - -The driver information previously displayed in the /proc filesystem is not -supported in this release. Alternatively, you can use ethtool (version 1.6 -or later), lspci, and iproute2 to obtain the same information. - -Instructions on updating ethtool can be found in the section "Additional -Configurations" later in this document. - - -Identifying Your Adapter -======================== - -The following Intel network adapters are compatible with the drivers in this -release: - -Controller Adapter Name Physical Layer ----------- ------------ -------------- -82597EX Intel(R) PRO/10GbE LR/SR/CX4 10G Base-LR (1310 nm optical fiber) - Server Adapters 10G Base-SR (850 nm optical fiber) - 10G Base-CX4(twin-axial copper cabling) - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/network/sb/CS-012904.htm - - -Building and Installation -========================= - -select m for "Intel(R) PRO/10GbE support" located at: - Location: - -> Device Drivers - -> Network device support (NETDEVICES [=y]) - -> Ethernet (10000 Mbit) (NETDEV_10000 [=y]) -1. make modules && make modules_install - -2. Load the module: - -    modprobe ixgb <parameter>=<value> - - The insmod command can be used if the full - path to the driver module is specified. For example: - - insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgb/ixgb.ko - - With 2.6 based kernels also make sure that older ixgb drivers are - removed from the kernel, before loading the new module: - - rmmod ixgb; modprobe ixgb - -3. Assign an IP address to the interface by entering the following, where - x is the interface number: - - ip addr add ethx <IP_address> - -4. Verify that the interface works. Enter the following, where <IP_address> - is the IP address for another machine on the same subnet as the interface - that is being tested: - - ping <IP_address> - - -Command Line Parameters -======================= - -If the driver is built as a module, the following optional parameters are -used by entering them on the command line with the modprobe command using -this syntax: - - modprobe ixgb [<option>=<VAL1>,<VAL2>,...] - -For example, with two 10GbE PCI adapters, entering: - - modprobe ixgb TxDescriptors=80,128 - -loads the ixgb driver with 80 TX resources for the first adapter and 128 TX -resources for the second adapter. - -The default value for each parameter is generally the recommended setting, -unless otherwise noted. - -FlowControl -Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) -Default: Read from the EEPROM - If EEPROM is not detected, default is 1 - This parameter controls the automatic generation(Tx) and response(Rx) to - Ethernet PAUSE frames. There are hardware bugs associated with enabling - Tx flow control so beware. - -RxDescriptors -Valid Range: 64-512 -Default Value: 512 - This value is the number of receive descriptors allocated by the driver. - Increasing this value allows the driver to buffer more incoming packets. - Each descriptor is 16 bytes. A receive buffer is also allocated for - each descriptor and can be either 2048, 4056, 8192, or 16384 bytes, - depending on the MTU setting. When the MTU size is 1500 or less, the - receive buffer size is 2048 bytes. When the MTU is greater than 1500 the - receive buffer size will be either 4056, 8192, or 16384 bytes. The - maximum MTU size is 16114. - -RxIntDelay -Valid Range: 0-65535 (0=off) -Default Value: 72 - This value delays the generation of receive interrupts in units of - 0.8192 microseconds. Receive interrupt reduction can improve CPU - efficiency if properly tuned for specific network traffic. Increasing - this value adds extra latency to frame reception and can end up - decreasing the throughput of TCP traffic. If the system is reporting - dropped receives, this value may be set too high, causing the driver to - run out of available receive descriptors. - -TxDescriptors -Valid Range: 64-4096 -Default Value: 256 - This value is the number of transmit descriptors allocated by the driver. - Increasing this value allows the driver to queue more transmits. Each - descriptor is 16 bytes. - -XsumRX -Valid Range: 0-1 -Default Value: 1 - A value of '1' indicates that the driver should enable IP checksum - offload for received packets (both UDP and TCP) to the adapter hardware. - - -Improving Performance -===================== - -With the 10 Gigabit server adapters, the default Linux configuration will -very likely limit the total available throughput artificially. There is a set -of configuration changes that, when applied together, will increase the ability -of Linux to transmit and receive data. The following enhancements were -originally acquired from settings published at http://www.spec.org/web99/ for -various submitted results using Linux. - -NOTE: These changes are only suggestions, and serve as a starting point for - tuning your network performance. - -The changes are made in three major ways, listed in order of greatest effect: -- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen - parameter. -- Use sysctl to modify /proc parameters (essentially kernel tuning) -- Use setpci to modify the MMRBC field in PCI-X configuration space to increase - transmit burst lengths on the bus. - -NOTE: setpci modifies the adapter's configuration registers to allow it to read -up to 4k bytes at a time (for transmits). However, for some systems the -behavior after modifying this register may be undefined (possibly errors of -some kind). A power-cycle, hard reset or explicitly setting the e6 register -back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a -stable configuration. - -- COPY these lines and paste them into ixgb_perf.sh: -#!/bin/bash -echo "configuring network performance , edit this file to change the interface -or device ID of 10GbE card" -# set mmrbc to 4k reads, modify only Intel 10GbE device IDs -# replace 1a48 with appropriate 10GbE device's ID installed on the system, -# if needed. -setpci -d 8086:1a48 e6.b=2e -# set the MTU (max transmission unit) - it requires your switch and clients -# to change as well. -# set the txqueuelen -# your ixgb adapter should be loaded as eth1 for this to work, change if needed -ip li set dev eth1 mtu 9000 txqueuelen 1000 up -# call the sysctl utility to modify /proc/sys entries -sysctl -p ./sysctl_ixgb.conf -- END ixgb_perf.sh - -- COPY these lines and paste them into sysctl_ixgb.conf: -# some of the defaults may be different for your kernel -# call this file with sysctl -p <this file> -# these are just suggested values that worked well to increase throughput in -# several network benchmark tests, your mileage may vary - -### IPV4 specific settings -# turn TCP timestamp support off, default 1, reduces CPU use -net.ipv4.tcp_timestamps = 0 -# turn SACK support off, default on -# on systems with a VERY fast bus -> memory interface this is the big gainer -net.ipv4.tcp_sack = 0 -# set min/default/max TCP read buffer, default 4096 87380 174760 -net.ipv4.tcp_rmem = 10000000 10000000 10000000 -# set min/pressure/max TCP write buffer, default 4096 16384 131072 -net.ipv4.tcp_wmem = 10000000 10000000 10000000 -# set min/pressure/max TCP buffer space, default 31744 32256 32768 -net.ipv4.tcp_mem = 10000000 10000000 10000000 - -### CORE settings (mostly for socket and UDP effect) -# set maximum receive socket buffer size, default 131071 -net.core.rmem_max = 524287 -# set maximum send socket buffer size, default 131071 -net.core.wmem_max = 524287 -# set default receive socket buffer size, default 65535 -net.core.rmem_default = 524287 -# set default send socket buffer size, default 65535 -net.core.wmem_default = 524287 -# set maximum amount of option memory buffers, default 10240 -net.core.optmem_max = 524287 -# set number of unprocessed input packets before kernel starts dropping them; default 300 -net.core.netdev_max_backlog = 300000 -- END sysctl_ixgb.conf - -Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface -your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's -ID installed on the system. - -NOTE: Unless these scripts are added to the boot process, these changes will - only last only until the next system reboot. - - -Resolving Slow UDP Traffic --------------------------- -If your server does not seem to be able to receive UDP traffic as fast as it -can receive TCP traffic, it could be because Linux, by default, does not set -the network stack buffers as large as they need to be to support high UDP -transfer rates. One way to alleviate this problem is to allow more memory to -be used by the IP stack to store incoming data. - -For instance, use the commands: - sysctl -w net.core.rmem_max=262143 -and - sysctl -w net.core.rmem_default=262143 -to increase the read buffer memory max and default to 262143 (256k - 1) from -defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables -will increase the amount of memory used by the network stack for receives, and -can be increased significantly more if necessary for your application. - - -Additional Configurations -========================= - - Configuring the Driver on Different Distributions - ------------------------------------------------- - Configuring a network driver to load properly when the system is started is - distribution dependent. Typically, the configuration process involves adding - an alias line to /etc/modprobe.conf as well as editing other system startup - scripts and/or configuration files. Many popular Linux distributions ship - with tools to make these changes for you. To learn the proper way to - configure a network device for your system, refer to your distribution - documentation. If during this process you are asked for the driver or module - name, the name for the Linux Base Driver for the Intel 10GbE Family of - Adapters is ixgb. - - Viewing Link Messages - --------------------- - Link messages will not be displayed to the console if the distribution is - restricting system messages. In order to see network driver link messages on - your console, set dmesg to eight by entering the following: - - dmesg -n 8 - - NOTE: This setting is not saved across reboots. - - - Jumbo Frames - ------------ - The driver supports Jumbo Frames for all adapters. Jumbo Frames support is - enabled by changing the MTU to a value larger than the default of 1500. - The maximum value for the MTU is 16114. Use the ip command to - increase the MTU size. For example: - - ip li set dev ethx mtu 9000 - - The maximum MTU setting for Jumbo Frames is 16114. This value coincides - with the maximum Jumbo Frames size of 16128. - - - ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The ethtool - version 1.6 or later is required for this functionality. - - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool/ - - NOTE: The ethtool version 1.6 only supports a limited set of ethtool options. - Support for a more complete ethtool feature set can be enabled by - upgrading to the latest version. - - - NAPI - ---- - - NAPI (Rx polling mode) is supported in the ixgb driver. NAPI is enabled - or disabled based on the configuration of the kernel. see CONFIG_IXGB_NAPI - - See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. - - -Known Issues/Troubleshooting -============================ - - NOTE: After installing the driver, if your Intel Network Connection is not - working, verify in the "In This Release" section of the readme that you have - installed the correct driver. - - Intel(R) PRO/10GbE CX4 Server Adapter Cable Interoperability Issue with - Fujitsu XENPAK Module in SmartBits Chassis - --------------------------------------------------------------------- - Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 - Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits - chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni. - The CRC errors may be received either by the Intel(R) PRO/10GbE CX4 - Server adapter or the SmartBits. If this situation occurs using a different - cable assembly may resolve the issue. - - CX4 Server Adapter Cable Interoperability Issues with HP Procurve 3400cl - Switch Port - ------------------------------------------------------------------------ - Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server - adapter is connected to an HP Procurve 3400cl switch port using short cables - (1 m or shorter). If this situation occurs, using a longer cable may resolve - the issue. - - Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that - Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC - errors may be received either by the CX4 Server adapter or at the switch. If - this situation occurs, using a different cable assembly may resolve the issue. - - - Jumbo Frames System Requirement - ------------------------------- - Memory allocation failures have been observed on Linux systems with 64 MB - of RAM or less that are running Jumbo Frames. If you are using Jumbo - Frames, your system may require more than the advertised minimum - requirement of 64 MB of system memory. - - - Performance Degradation with Jumbo Frames - ----------------------------------------- - Degradation in throughput performance may be observed in some Jumbo frames - environments. If this is observed, increasing the application's socket buffer - size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. - See the specific application manual and /usr/src/linux*/Documentation/ - networking/ip-sysctl.txt for more details. - - - Allocating Rx Buffers when Using Jumbo Frames - --------------------------------------------- - Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if - the available memory is heavily fragmented. This issue may be seen with PCI-X - adapters or with packet split disabled. This can be reduced or eliminated - by changing the amount of available memory for receive buffer allocation, by - increasing /proc/sys/vm/min_free_kbytes. - - - Multiple Interfaces on Same Ethernet Broadcast Network - ------------------------------------------------------ - Due to the default ARP behavior on Linux, it is not possible to have - one system on two IP networks in the same Ethernet broadcast domain - (non-partitioned switch) behave as expected. All Ethernet interfaces - will respond to IP traffic for any IP address assigned to the system. - This results in unbalanced receive traffic. - - If you have multiple interfaces in a server, do either of the following: - - - Turn on ARP filtering by entering: - echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter - - - Install the interfaces in separate broadcast domains - either in - different switches or in a switch partitioned to VLANs. - - - UDP Stress Test Dropped Packet Issue - -------------------------------------- - Under small packets UDP stress test with 10GbE driver, the Linux system - may drop UDP packets due to the fullness of socket buffers. You may want - to change the driver's Flow Control variables to the minimum value for - controlling packet reception. - - - Tx Hangs Possible Under Stress - ------------------------------ - Under stress conditions, if TX hangs occur, turning off TSO - "ethtool -K eth0 tso off" may resolve the problem. - - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/ixgbe.rst b/Documentation/networking/ixgbe.rst new file mode 100644 index 000000000000..725fc697fd8f --- /dev/null +++ b/Documentation/networking/ixgbe.rst @@ -0,0 +1,527 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters +============================================================================= + +Intel 10 Gigabit Linux driver. +Copyright(c) 1999-2018 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Command Line Parameters +- Additional Configurations +- Known Issues +- Support + +Identifying Your Adapter +======================== +The driver is compatible with devices based on the following: + + * Intel(R) Ethernet Controller 82598 + * Intel(R) Ethernet Controller 82599 + * Intel(R) Ethernet Controller X520 + * Intel(R) Ethernet Controller X540 + * Intel(R) Ethernet Controller x550 + * Intel(R) Ethernet Controller X552 + * Intel(R) Ethernet Controller X553 + +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +https://www.intel.com/support + +SFP+ Devices with Pluggable Optics +---------------------------------- + +82599-BASED ADAPTERS +~~~~~~~~~~~~~~~~~~~~ +NOTES: +- If your 82599-based Intel(R) Network Adapter came with Intel optics or is an +Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel optics +and/or the direct attach cables listed below. +- When 82599-based SFP+ devices are connected back to back, they should be set +to the same Speed setting via ethtool. Results may vary if you mix speed +settings. + ++---------------+---------------------------------------+------------------+ +| Supplier | Type | Part Numbers | ++===============+=======================================+==================+ +| SR Modules | ++---------------+---------------------------------------+------------------+ +| Intel | DUAL RATE 1G/10G SFP+ SR (bailed) | FTLX8571D3BCV-IT | ++---------------+---------------------------------------+------------------+ +| Intel | DUAL RATE 1G/10G SFP+ SR (bailed) | AFBR-703SDZ-IN2 | ++---------------+---------------------------------------+------------------+ +| Intel | DUAL RATE 1G/10G SFP+ SR (bailed) | AFBR-703SDDZ-IN1 | ++---------------+---------------------------------------+------------------+ +| LR Modules | ++---------------+---------------------------------------+------------------+ +| Intel | DUAL RATE 1G/10G SFP+ LR (bailed) | FTLX1471D3BCV-IT | ++---------------+---------------------------------------+------------------+ +| Intel | DUAL RATE 1G/10G SFP+ LR (bailed) | AFCT-701SDZ-IN2 | ++---------------+---------------------------------------+------------------+ +| Intel | DUAL RATE 1G/10G SFP+ LR (bailed) | AFCT-701SDDZ-IN1 | ++---------------+---------------------------------------+------------------+ + +The following is a list of 3rd party SFP+ modules that have received some +testing. Not all modules are applicable to all devices. + ++---------------+---------------------------------------+------------------+ +| Supplier | Type | Part Numbers | ++===============+=======================================+==================+ +| Finisar | SFP+ SR bailed, 10g single rate | FTLX8571D3BCL | ++---------------+---------------------------------------+------------------+ +| Avago | SFP+ SR bailed, 10g single rate | AFBR-700SDZ | ++---------------+---------------------------------------+------------------+ +| Finisar | SFP+ LR bailed, 10g single rate | FTLX1471D3BCL | ++---------------+---------------------------------------+------------------+ +| Finisar | DUAL RATE 1G/10G SFP+ SR (No Bail) | FTLX8571D3QCV-IT | ++---------------+---------------------------------------+------------------+ +| Avago | DUAL RATE 1G/10G SFP+ SR (No Bail) | AFBR-703SDZ-IN1 | ++---------------+---------------------------------------+------------------+ +| Finisar | DUAL RATE 1G/10G SFP+ LR (No Bail) | FTLX1471D3QCV-IT | ++---------------+---------------------------------------+------------------+ +| Avago | DUAL RATE 1G/10G SFP+ LR (No Bail) | AFCT-701SDZ-IN1 | ++---------------+---------------------------------------+------------------+ +| Finisar | 1000BASE-T SFP | FCLF8522P2BTL | ++---------------+---------------------------------------+------------------+ +| Avago | 1000BASE-T | ABCU-5710RZ | ++---------------+---------------------------------------+------------------+ +| HP | 1000BASE-SX SFP | 453153-001 | ++---------------+---------------------------------------+------------------+ + +82599-based adapters support all passive and active limiting direct attach +cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. + +Laser turns off for SFP+ when ifconfig ethX down +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +"ifconfig ethX down" turns off the laser for 82599-based SFP+ fiber adapters. +"ifconfig ethX up" turns on the laser. +Alternatively, you can use "ip link set [down/up] dev ethX" to turn the +laser off and on. + + +82599-based QSFP+ Adapters +~~~~~~~~~~~~~~~~~~~~~~~~~~ +NOTES: +- If your 82599-based Intel(R) Network Adapter came with Intel optics, it only +supports Intel optics. +- 82599-based QSFP+ adapters only support 4x10 Gbps connections. 1x40 Gbps +connections are not supported. QSFP+ link partners must be configured for +4x10 Gbps. +- 82599-based QSFP+ adapters do not support automatic link speed detection. +The link speed must be configured to either 10 Gbps or 1 Gbps to match the link +partners speed capabilities. Incorrect speed configurations will result in +failure to link. +- Intel(R) Ethernet Converged Network Adapter X520-Q1 only supports the optics +and direct attach cables listed below. + ++---------------+---------------------------------------+------------------+ +| Supplier | Type | Part Numbers | ++===============+=======================================+==================+ +| Intel | DUAL RATE 1G/10G QSFP+ SRL (bailed) | E10GQSFPSR | ++---------------+---------------------------------------+------------------+ + +82599-based QSFP+ adapters support all passive and active limiting QSFP+ +direct attach cables that comply with SFF-8436 v4.1 specifications. + +82598-BASED ADAPTERS +~~~~~~~~~~~~~~~~~~~~ +NOTES: +- Intel(r) Ethernet Network Adapters that support removable optical modules +only support their original module type (for example, the Intel(R) 10 Gigabit +SR Dual Port Express Module only supports SR optical modules). If you plug in +a different type of module, the driver will not load. +- Hot Swapping/hot plugging optical modules is not supported. +- Only single speed, 10 gigabit modules are supported. +- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module +types are not supported. Please see your system documentation for details. + +The following is a list of SFP+ modules and direct attach cables that have +received some testing. Not all modules are applicable to all devices. + ++---------------+---------------------------------------+------------------+ +| Supplier | Type | Part Numbers | ++===============+=======================================+==================+ +| Finisar | SFP+ SR bailed, 10g single rate | FTLX8571D3BCL | ++---------------+---------------------------------------+------------------+ +| Avago | SFP+ SR bailed, 10g single rate | AFBR-700SDZ | ++---------------+---------------------------------------+------------------+ +| Finisar | SFP+ LR bailed, 10g single rate | FTLX1471D3BCL | ++---------------+---------------------------------------+------------------+ + +82598-based adapters support all passive direct attach cables that comply with +SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach cables +are not supported. + +Third party optic modules and cables referred to above are listed only for the +purpose of highlighting third party specifications and potential +compatibility, and are not recommendations or endorsements or sponsorship of +any third party's product by Intel. Intel is not endorsing or promoting +products made by any third party and the third party reference is provided +only to share information regarding certain optic modules and cables with the +above specifications. There may be other manufacturers or suppliers, producing +or supplying optic modules and cables with similar or matching descriptions. +Customers must use their own discretion and diligence to purchase optic +modules and cables from any third party of their choice. Customers are solely +responsible for assessing the suitability of the product and/or devices and +for the selection of the vendor for purchasing any product. THE OPTIC MODULES +AND CABLES REFERRED TO ABOVE ARE NOT WARRANTED OR SUPPORTED BY INTEL. INTEL +ASSUMES NO LIABILITY WHATSOEVER, AND INTEL DISCLAIMS ANY EXPRESS OR IMPLIED +WARRANTY, RELATING TO SALE AND/OR USE OF SUCH THIRD PARTY PRODUCTS OR +SELECTION OF VENDOR BY CUSTOMERS. + +Command Line Parameters +======================= + +max_vfs +------- +:Valid Range: 1-63 + +This parameter adds support for SR-IOV. It causes the driver to spawn up to +max_vfs worth of virtual functions. +If the value is greater than 0 it will also force the VMDq parameter to be 1 or +more. + +NOTE: This parameter is only used on kernel 3.7.x and below. On kernel 3.8.x +and above, use sysfs to enable VFs. Also, for Red Hat distributions, this +parameter is only used on version 6.6 and older. For version 6.7 and newer, use +sysfs. For example:: + + #echo $num_vf_enabled > /sys/class/net/$dev/device/sriov_numvfs // enable VFs + #echo 0 > /sys/class/net/$dev/device/sriov_numvfs //disable VFs + +The parameters for the driver are referenced by position. Thus, if you have a +dual port adapter, or more than one adapter in your system, and want N virtual +functions per port, you must specify a number for each port with each parameter +separated by a comma. For example:: + + modprobe ixgbe max_vfs=4 + +This will spawn 4 VFs on the first port. + +:: + + modprobe ixgbe max_vfs=2,4 + +This will spawn 2 VFs on the first port and 4 VFs on the second port. + +NOTE: Caution must be used in loading the driver with these parameters. +Depending on your system configuration, number of slots, etc., it is impossible +to predict in all cases where the positions would be on the command line. + +NOTE: Neither the device nor the driver control how VFs are mapped into config +space. Bus layout will vary by operating system. On operating systems that +support it, you can check sysfs to find the mapping. + +NOTE: When either SR-IOV mode or VMDq mode is enabled, hardware VLAN filtering +and VLAN tag stripping/insertion will remain enabled. Please remove the old +VLAN filter before the new VLAN filter is added. For example, + +:: + + ip link set eth0 vf 0 vlan 100 // set VLAN 100 for VF 0 + ip link set eth0 vf 0 vlan 0 // Delete VLAN 100 + ip link set eth0 vf 0 vlan 200 // set a new VLAN 200 for VF 0 + +With kernel 3.6, the driver supports the simultaneous usage of max_vfs and DCB +features, subject to the constraints described below. Prior to kernel 3.6, the +driver did not support the simultaneous operation of max_vfs greater than 0 and +the DCB features (multiple traffic classes utilizing Priority Flow Control and +Extended Transmission Selection). + +When DCB is enabled, network traffic is transmitted and received through +multiple traffic classes (packet buffers in the NIC). The traffic is associated +with a specific class based on priority, which has a value of 0 through 7 used +in the VLAN tag. When SR-IOV is not enabled, each traffic class is associated +with a set of receive/transmit descriptor queue pairs. The number of queue +pairs for a given traffic class depends on the hardware configuration. When +SR-IOV is enabled, the descriptor queue pairs are grouped into pools. The +Physical Function (PF) and each Virtual Function (VF) is allocated a pool of +receive/transmit descriptor queue pairs. When multiple traffic classes are +configured (for example, DCB is enabled), each pool contains a queue pair from +each traffic class. When a single traffic class is configured in the hardware, +the pools contain multiple queue pairs from the single traffic class. + +The number of VFs that can be allocated depends on the number of traffic +classes that can be enabled. The configurable number of traffic classes for +each enabled VF is as follows: +0 - 15 VFs = Up to 8 traffic classes, depending on device support +16 - 31 VFs = Up to 4 traffic classes +32 - 63 VFs = 1 traffic class + +When VFs are configured, the PF is allocated one pool as well. The PF supports +the DCB features with the constraint that each traffic class will only use a +single queue pair. When zero VFs are configured, the PF can support multiple +queue pairs per traffic class. + +allow_unsupported_sfp +--------------------- +:Valid Range: 0,1 +:Default Value: 0 (disabled) + +This parameter allows unsupported and untested SFP+ modules on 82599-based +adapters, as long as the type of module is known to the driver. + +debug +----- +:Valid Range: 0-16 (0=none,...,16=all) +:Default Value: 0 + +This parameter adjusts the level of debug messages displayed in the system +logs. + + +Additional Features and Configurations +====================================== + +Flow Control +------------ +Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable +receiving and transmitting pause frames for ixgbe. When transmit is enabled, +pause frames are generated when the receive packet buffer crosses a predefined +threshold. When receive is enabled, the transmit unit will halt for the time +delay specified when a pause frame is received. + +NOTE: You must have a flow control capable link partner. + +Flow Control is enabled by default. + +Use ethtool to change the flow control settings. To enable or disable Rx or +Tx Flow Control:: + + ethtool -A eth? rx <on|off> tx <on|off> + +Note: This command only enables or disables Flow Control if auto-negotiation is +disabled. If auto-negotiation is enabled, this command changes the parameters +used for auto-negotiation with the link partner. + +To enable or disable auto-negotiation:: + + ethtool -s eth? autoneg <on|off> + +Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending +on your device, you may not be able to change the auto-negotiation setting. + +NOTE: For 82598 backplane cards entering 1 gigabit mode, flow control default +behavior is changed to off. Flow control in 1 gigabit mode on these devices can +lead to transmit hangs. + +Intel(R) Ethernet Flow Director +------------------------------- +The Intel Ethernet Flow Director performs the following tasks: + +- Directs receive packets according to their flows to different queues. +- Enables tight control on routing a flow in the platform. +- Matches flows and CPU cores for flow affinity. +- Supports multiple parameters for flexible flow classification and load + balancing (in SFP mode only). + +NOTE: Intel Ethernet Flow Director masking works in the opposite manner from +subnet masking. In the following command:: + + #ethtool -N eth11 flow-type ip4 src-ip 172.4.1.2 m 255.0.0.0 dst-ip \ + 172.21.1.1 m 255.128.0.0 action 31 + +The src-ip value that is written to the filter will be 0.4.1.2, not 172.0.0.0 +as might be expected. Similarly, the dst-ip value written to the filter will be +0.21.1.1, not 172.0.0.0. + +To enable or disable the Intel Ethernet Flow Director:: + + # ethtool -K ethX ntuple <on|off> + +When disabling ntuple filters, all the user programmed filters are flushed from +the driver cache and hardware. All needed filters must be re-added when ntuple +is re-enabled. + +To add a filter that directs packet to queue 2, use -U or -N switch:: + + # ethtool -N ethX flow-type tcp4 src-ip 192.168.10.1 dst-ip \ + 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1] + +To see the list of filters currently present:: + + # ethtool <-u|-n> ethX + +Sideband Perfect Filters +------------------------ +Sideband Perfect Filters are used to direct traffic that matches specified +characteristics. They are enabled through ethtool's ntuple interface. To add a +new filter use the following command:: + + ethtool -U <device> flow-type <type> src-ip <ip> dst-ip <ip> src-port <port> \ + dst-port <port> action <queue> + +Where: + <device> - the ethernet device to program + <type> - can be ip4, tcp4, udp4, or sctp4 + <ip> - the IP address to match on + <port> - the port number to match on + <queue> - the queue to direct traffic towards (-1 discards the matched traffic) + +Use the following command to delete a filter:: + + ethtool -U <device> delete <N> + +Where <N> is the filter id displayed when printing all the active filters, and +may also have been specified using "loc <N>" when adding the filter. + +The following example matches TCP traffic sent from 192.168.0.1, port 5300, +directed to 192.168.0.5, port 80, and sends it to queue 7:: + + ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 \ + src-port 5300 dst-port 80 action 7 + +For each flow-type, the programmed filters must all have the same matching +input set. For example, issuing the following two commands is acceptable:: + + ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7 + ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10 + +Issuing the next two commands, however, is not acceptable, since the first +specifies src-ip and the second specifies dst-ip:: + + ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7 + ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10 + +The second command will fail with an error. You may program multiple filters +with the same fields, using different values, but, on one device, you may not +program two TCP4 filters with different matching fields. + +Matching on a sub-portion of a field is not supported by the ixgbe driver, thus +partial mask fields are not supported. + +To create filters that direct traffic to a specific Virtual Function, use the +"user-def" parameter. Specify the user-def as a 64 bit value, where the lower 32 +bits represents the queue number, while the next 8 bits represent which VF. +Note that 0 is the PF, so the VF identifier is offset by 1. For example:: + + ... user-def 0x800000002 ... + +specifies to direct traffic to Virtual Function 7 (8 minus 1) into queue 2 of +that VF. + +Note that these filters will not break internal routing rules, and will not +route traffic that otherwise would not have been sent to the specified Virtual +Function. + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ifconfig command to increase the MTU size. For example, enter the +following where <x> is the interface number:: + + ifconfig eth<x> mtu 9000 up + +Alternatively, you can use the ip command as follows:: + + ip link set mtu 9000 dev eth<x> + ip link set up dev eth<x> + +This setting is not saved across reboots. The setting change can be made +permanent by adding 'MTU=9000' to the file:: + + /etc/sysconfig/network-scripts/ifcfg-eth<x> // for RHEL + /etc/sysconfig/network/<config_file> // for SLES + +NOTE: The maximum MTU setting for Jumbo Frames is 9710. This value coincides +with the maximum Jumbo Frames size of 9728 bytes. + +NOTE: This driver will attempt to use multiple page sized buffers to receive +each jumbo packet. This should help to avoid buffer starvation issues when +allocating receive packets. + +NOTE: For 82599-based network connections, if you are enabling jumbo frames in +a virtual function (VF), jumbo frames must first be enabled in the physical +function (PF). The VF MTU setting cannot be larger than the PF MTU. + +Generic Receive Offload, aka GRO +-------------------------------- +The driver supports the in-kernel software implementation of GRO. GRO has +shown that by coalescing Rx traffic into larger chunks of data, CPU +utilization can be significantly reduced when under large Rx load. GRO is an +evolution of the previously-used LRO interface. GRO is able to coalesce +other protocols besides TCP. It's also safe to use with configurations that +are problematic for LRO, namely bridging and iSCSI. + +Data Center Bridging (DCB) +-------------------------- +NOTE: +The kernel assumes that TC0 is available, and will disable Priority Flow +Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is +enabled when setting up DCB on your switch. + +DCB is a configuration Quality of Service implementation in hardware. It uses +the VLAN priority tag (802.1p) to filter traffic. That means that there are 8 +different priorities that traffic can be filtered into. It also enables +priority flow control (802.1Qbb) which can limit or eliminate the number of +dropped packets during network stress. Bandwidth can be allocated to each of +these priorities, which is enforced at the hardware level (802.1Qaz). + +Adapter firmware implements LLDP and DCBX protocol agents as per 802.1AB and +802.1Qaz respectively. The firmware based DCBX agent runs in willing mode only +and can accept settings from a DCBX capable peer. Software configuration of +DCBX parameters via dcbtool/lldptool are not supported. + +The ixgbe driver implements the DCB netlink interface layer to allow user-space +to communicate with the driver and query DCB configuration for the port. + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: +https://www.kernel.org/pub/software/network/ethtool/ + +FCoE +---- +The ixgbe driver supports Fiber Channel over Ethernet (FCoE) and Data Center +Bridging (DCB). This code has no default effect on the regular driver +operation. Configuring DCB and FCoE is outside the scope of this README. Refer +to http://www.open-fcoe.org/ for FCoE project information and contact +ixgbe-eedc@lists.sourceforge.net for DCB information. + +MAC and VLAN anti-spoofing feature +---------------------------------- +When a malicious driver attempts to send a spoofed packet, it is dropped by the +hardware and not transmitted. + +An interrupt is sent to the PF driver notifying it of the spoof attempt. When a +spoofed packet is detected, the PF driver will send the following message to +the system log (displayed by the "dmesg" command):: + + ixgbe ethX: ixgbe_spoof_check: n spoofed packets detected + +where "x" is the PF interface number; and "n" is number of spoofed packets. +NOTE: This feature can be disabled for a specific Virtual Function (VF):: + + ip link set <pf dev> vf <vf id> spoofchk {off|on} + + +Known Issues/Troubleshooting +============================ + +Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS +----------------------------------------------------------------------- +Linux KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. +This includes traditional PCIe devices, as well as SR-IOV-capable devices based +on the Intel Ethernet Controller XL710. + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt deleted file mode 100644 index 687835415707..000000000000 --- a/Documentation/networking/ixgbe.txt +++ /dev/null @@ -1,349 +0,0 @@ -Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Family of -Adapters -============================================================================= - -Intel 10 Gigabit Linux driver. -Copyright(c) 1999 - 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Additional Configurations -- Performance Tuning -- Known Issues -- Support - -Identifying Your Adapter -======================== - -The driver in this release is compatible with 82598, 82599 and X540-based -Intel Network Connections. - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/network/sb/CS-012904.htm - -SFP+ Devices with Pluggable Optics ----------------------------------- - -82599-BASED ADAPTERS - -NOTES: If your 82599-based Intel(R) Network Adapter came with Intel optics, or -is an Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel -optics and/or the direct attach cables listed below. - -When 82599-based SFP+ devices are connected back to back, they should be set to -the same Speed setting via ethtool. Results may vary if you mix speed settings. -82598-based adapters support all passive direct attach cables that comply -with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach -cables are not supported. - -Supplier Type Part Numbers - -SR Modules -Intel DUAL RATE 1G/10G SFP+ SR (bailed) FTLX8571D3BCV-IT -Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDDZ-IN1 -Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDZ-IN2 -LR Modules -Intel DUAL RATE 1G/10G SFP+ LR (bailed) FTLX1471D3BCV-IT -Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDDZ-IN1 -Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDZ-IN2 - -The following is a list of 3rd party SFP+ modules and direct attach cables that -have received some testing. Not all modules are applicable to all devices. - -Supplier Type Part Numbers - -Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL -Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ -Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL - -Finisar DUAL RATE 1G/10G SFP+ SR (No Bail) FTLX8571D3QCV-IT -Avago DUAL RATE 1G/10G SFP+ SR (No Bail) AFBR-703SDZ-IN1 -Finisar DUAL RATE 1G/10G SFP+ LR (No Bail) FTLX1471D3QCV-IT -Avago DUAL RATE 1G/10G SFP+ LR (No Bail) AFCT-701SDZ-IN1 -Finistar 1000BASE-T SFP FCLF8522P2BTL -Avago 1000BASE-T SFP ABCU-5710RZ - -82599-based adapters support all passive and active limiting direct attach -cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. - -Laser turns off for SFP+ when device is down -------------------------------------------- -"ip link set down" turns off the laser for 82599-based SFP+ fiber adapters. -"ip link set up" turns on the laser. - - -82598-BASED ADAPTERS - -NOTES for 82598-Based Adapters: -- Intel(R) Network Adapters that support removable optical modules only support - their original module type (i.e., the Intel(R) 10 Gigabit SR Dual Port - Express Module only supports SR optical modules). If you plug in a different - type of module, the driver will not load. -- Hot Swapping/hot plugging optical modules is not supported. -- Only single speed, 10 gigabit modules are supported. -- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module - types are not supported. Please see your system documentation for details. - -The following is a list of 3rd party SFP+ modules and direct attach cables that -have received some testing. Not all modules are applicable to all devices. - -Supplier Type Part Numbers - -Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL -Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ -Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL - -82598-based adapters support all passive direct attach cables that comply -with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach -cables are not supported. - - -Flow Control ------------- -Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable -receiving and transmitting pause frames for ixgbe. When TX is enabled, PAUSE -frames are generated when the receive packet buffer crosses a predefined -threshold. When rx is enabled, the transmit unit will halt for the time delay -specified when a PAUSE frame is received. - -Flow Control is enabled by default. If you want to disable a flow control -capable link partner, use ethtool: - - ethtool -A eth? autoneg off RX off TX off - -NOTE: For 82598 backplane cards entering 1 gig mode, flow control default -behavior is changed to off. Flow control in 1 gig mode on these devices can -lead to Tx hangs. - -Intel(R) Ethernet Flow Director -------------------------------- -Supports advanced filters that direct receive packets by their flows to -different queues. Enables tight control on routing a flow in the platform. -Matches flows and CPU cores for flow affinity. Supports multiple parameters -for flexible flow classification and load balancing. - -Flow director is enabled only if the kernel is multiple TX queue capable. - -An included script (set_irq_affinity.sh) automates setting the IRQ to CPU -affinity. - -You can verify that the driver is using Flow Director by looking at the counter -in ethtool: fdir_miss and fdir_match. - -Other ethtool Commands: -To enable Flow Director - ethtool -K ethX ntuple on -To add a filter - Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 10.0.128.23 - action 1 -To see the list of filters currently present: - ethtool -u ethX - -Perfect Filter: Perfect filter is an interface to load the filter table that -funnels all flow into queue_0 unless an alternative queue is specified using -"action". In that case, any flow that matches the filter criteria will be -directed to the appropriate queue. - -If the queue is defined as -1, filter will drop matching packets. - -To account for filter matches and misses, there are two stats in ethtool: -fdir_match and fdir_miss. In addition, rx_queue_N_packets shows the number of -packets processed by the Nth queue. - -NOTE: Receive Packet Steering (RPS) and Receive Flow Steering (RFS) are not -compatible with Flow Director. IF Flow Director is enabled, these will be -disabled. - -The following three parameters impact Flow Director. - -FdirMode --------- -Valid Range: 0-2 (0=off, 1=ATR, 2=Perfect filter mode) -Default Value: 1 - - Flow Director filtering modes. - -FdirPballoc ------------ -Valid Range: 0-2 (0=64k, 1=128k, 2=256k) -Default Value: 0 - - Flow Director allocated packet buffer size. - -AtrSampleRate --------------- -Valid Range: 1-100 -Default Value: 20 - - Software ATR Tx packet sample rate. For example, when set to 20, every 20th - packet, looks to see if the packet will create a new flow. - -Node ----- -Valid Range: 0-n -Default Value: 1 (off) - - 0 - n: where n is the number of NUMA nodes (i.e. 0 - 3) currently online in - your system - 1: turns this option off - - The Node parameter will allow you to pick which NUMA node you want to have - the adapter allocate memory on. - -max_vfs -------- -Valid Range: 1-63 -Default Value: 0 - - If the value is greater than 0 it will also force the VMDq parameter to be 1 - or more. - - This parameter adds support for SR-IOV. It causes the driver to spawn up to - max_vfs worth of virtual function. - - -Additional Configurations -========================= - - Jumbo Frames - ------------ - The driver supports Jumbo Frames for all adapters. Jumbo Frames support is - enabled by changing the MTU to a value larger than the default of 1500. - The maximum value for the MTU is 16110. Use the ip command to - increase the MTU size. For example: - - ip link set dev ethx mtu 9000 - - The maximum MTU setting for Jumbo Frames is 9710. This value coincides - with the maximum Jumbo Frames size of 9728. - - Generic Receive Offload, aka GRO - -------------------------------- - The driver supports the in-kernel software implementation of GRO. GRO has - shown that by coalescing Rx traffic into larger chunks of data, CPU - utilization can be significantly reduced when under large Rx load. GRO is an - evolution of the previously-used LRO interface. GRO is able to coalesce - other protocols besides TCP. It's also safe to use with configurations that - are problematic for LRO, namely bridging and iSCSI. - - Data Center Bridging, aka DCB - ----------------------------- - DCB is a configuration Quality of Service implementation in hardware. - It uses the VLAN priority tag (802.1p) to filter traffic. That means - that there are 8 different priorities that traffic can be filtered into. - It also enables priority flow control which can limit or eliminate the - number of dropped packets during network stress. Bandwidth can be - allocated to each of these priorities, which is enforced at the hardware - level. - - To enable DCB support in ixgbe, you must enable the DCB netlink layer to - allow the userspace tools (see below) to communicate with the driver. - This can be found in the kernel configuration here: - - -> Networking support - -> Networking options - -> Data Center Bridging support - - Once this is selected, DCB support must be selected for ixgbe. This can - be found here: - - -> Device Drivers - -> Network device support (NETDEVICES [=y]) - -> Ethernet (10000 Mbit) (NETDEV_10000 [=y]) - -> Intel(R) 10GbE PCI Express adapters support - -> Data Center Bridging (DCB) Support - - After these options are selected, you must rebuild your kernel and your - modules. - - In order to use DCB, userspace tools must be downloaded and installed. - The dcbd tools can be found at: - - http://e1000.sf.net - - Ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The latest - ethtool version is required for this functionality. - - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool/ - - FCoE - ---- - This release of the ixgbe driver contains new code to enable users to use - Fiber Channel over Ethernet (FCoE) and Data Center Bridging (DCB) - functionality that is supported by the 82598-based hardware. This code has - no default effect on the regular driver operation, and configuring DCB and - FCoE is outside the scope of this driver README. Refer to - http://www.open-fcoe.org/ for FCoE project information and contact - e1000-eedc@lists.sourceforge.net for DCB information. - - MAC and VLAN anti-spoofing feature - ---------------------------------- - When a malicious driver attempts to send a spoofed packet, it is dropped by - the hardware and not transmitted. An interrupt is sent to the PF driver - notifying it of the spoof attempt. - - When a spoofed packet is detected the PF driver will send the following - message to the system log (displayed by the "dmesg" command): - - Spoof event(s) detected on VF (n) - - Where n=the VF that attempted to do the spoofing. - - -Performance Tuning -================== - -An excellent article on performance tuning can be found at: - -http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf - - -Known Issues -============ - - Enabling SR-IOV in a 32-bit or 64-bit Microsoft* Windows* Server 2008/R2 - Guest OS using Intel (R) 82576-based GbE or Intel (R) 82599-based 10GbE - controller under KVM - ------------------------------------------------------------------------ - KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This - includes traditional PCIe devices, as well as SR-IOV-capable devices using - Intel 82576-based and 82599-based controllers. - - While direct assignment of a PCIe device or an SR-IOV Virtual Function (VF) - to a Linux-based VM running 2.6.32 or later kernel works fine, there is a - known issue with Microsoft Windows Server 2008 VM that results in a "yellow - bang" error. This problem is within the KVM VMM itself, not the Intel driver, - or the SR-IOV logic of the VMM, but rather that KVM emulates an older CPU - model for the guests, and this older CPU model does not support MSI-X - interrupts, which is a requirement for Intel SR-IOV. - - If you wish to use the Intel 82576 or 82599-based controllers in SR-IOV mode - with KVM and a Microsoft Windows Server 2008 guest try the following - workaround. The workaround is to tell KVM to emulate a different model of CPU - when using qemu to create the KVM guest: - - "-cpu qemu64,model=13" - - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://e1000.sourceforge.net - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/ixgbevf.rst b/Documentation/networking/ixgbevf.rst new file mode 100644 index 000000000000..56cde6366c2f --- /dev/null +++ b/Documentation/networking/ixgbevf.rst @@ -0,0 +1,66 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet +============================================================= + +Intel 10 Gigabit Virtual Function Linux driver. +Copyright(c) 1999-2018 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Known Issues +- Support + +This driver supports 82599, X540, X550, and X552-based virtual function devices +that can only be activated on kernels that support SR-IOV. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + + +Identifying Your Adapter +======================== +The driver is compatible with devices based on the following: + + * Intel(R) Ethernet Controller 82598 + * Intel(R) Ethernet Controller 82599 + * Intel(R) Ethernet Controller X520 + * Intel(R) Ethernet Controller X540 + * Intel(R) Ethernet Controller x550 + * Intel(R) Ethernet Controller X552 + * Intel(R) Ethernet Controller X553 + +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +https://www.intel.com/support + +Known Issues/Troubleshooting +============================ + +SR-IOV requires the correct platform and OS support. + +The guest OS loading this driver must support MSI-X interrupts. + +This driver is only supported as a loadable module at this time. Intel is not +supplying patches against the kernel source to allow for static linking of the +drivers. + +VLANs: There is a limit of a total of 64 shared VLANs to 1 or more VFs. + + +Support +======= +For general information, go to the Intel support website at: + +https://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: + +https://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/ixgbevf.txt b/Documentation/networking/ixgbevf.txt deleted file mode 100644 index 53d8d2a5a6a3..000000000000 --- a/Documentation/networking/ixgbevf.txt +++ /dev/null @@ -1,52 +0,0 @@ -Linux* Base Driver for Intel(R) Ethernet Network Connection -=========================================================== - -Intel Gigabit Linux driver. -Copyright(c) 1999 - 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Known Issues/Troubleshooting -- Support - -This file describes the ixgbevf Linux* Base Driver for Intel Network -Connection. - -The ixgbevf driver supports 82599-based virtual function devices that can only -be activated on kernels with CONFIG_PCI_IOV enabled. - -The ixgbevf driver supports virtual functions generated by the ixgbe driver -with a max_vfs value of 1 or greater. - -The guest OS loading the ixgbevf driver must support MSI-X interrupts. - -VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs. - -Identifying Your Adapter -======================== - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/go/network/adapter/idguide.htm - -Known Issues/Troubleshooting -============================ - - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0 deleted file mode 100644 index f9158e831e79..000000000000 --- a/LICENSES/other/CC-BY-SA-4.0 +++ /dev/null @@ -1,397 +0,0 @@ -Valid-License-Identifier: CC-BY-SA-4.0 -SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0 -Usage-Guide: - To use the Creative Commons Attribution Share Alike 4.0 International - license put the following SPDX tag/value pair into a comment according to - the placement guidelines in the licensing rules documentation: - SPDX-License-Identifier: CC-BY-SA-4.0 -License-Text: - -Creative Commons Attribution-ShareAlike 4.0 International - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of Creative -Commons public licenses does not create a lawyer-client or other -relationship. Creative Commons makes its licenses and related information -available on an "as-is" basis. Creative Commons gives no warranties -regarding its licenses, any material licensed under their terms and -conditions, or any related information. Creative Commons disclaims all -liability for damages resulting from their use to the fullest extent -possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share original -works of authorship and other material subject to copyright and certain -other rights specified in the public license below. The following -considerations are for informational purposes only, are not exhaustive, and -do not form part of our licenses. - -Considerations for licensors: Our public licenses are intended for use by -those authorized to give the public permission to use material in ways -otherwise restricted by copyright and certain other rights. Our licenses -are irrevocable. Licensors should read and understand the terms and -conditions of the license they choose before applying it. Licensors should -also secure all rights necessary before applying our licenses so that the -public can reuse the material as expected. Licensors should clearly mark -any material not subject to the license. This includes other CC-licensed -material, or material used under an exception or limitation to -copyright. More considerations for licensors : -wiki.creativecommons.org/Considerations_for_licensors - -Considerations for the public: By using one of our public licenses, a -licensor grants the public permission to use the licensed material under -specified terms and conditions. If the licensor's permission is not -necessary for any reason - for example, because of any applicable exception -or limitation to copyright - then that use is not regulated by the -license. Our licenses grant only permissions under copyright and certain -other rights that a licensor has authority to grant. Use of the licensed -material may still be restricted for other reasons, including because -others have copyright or other rights in the material. A licensor may make -special requests, such as asking that all changes be marked or described. - -Although not required by our licenses, you are encouraged to respect those -requests where reasonable. More considerations for the public : -wiki.creativecommons.org/Considerations_for_licensees - -Creative Commons Attribution-ShareAlike 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree to -be bound by the terms and conditions of this Creative Commons -Attribution-ShareAlike 4.0 International Public License ("Public -License"). To the extent this Public License may be interpreted as a -contract, You are granted the Licensed Rights in consideration of Your -acceptance of these terms and conditions, and the Licensor grants You such -rights in consideration of benefits the Licensor receives from making the -Licensed Material available under these terms and conditions. - -Section 1 - Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material and - in which the Licensed Material is translated, altered, arranged, - transformed, or otherwise modified in a manner requiring permission - under the Copyright and Similar Rights held by the Licensor. For - purposes of this Public License, where the Licensed Material is a - musical work, performance, or sound recording, Adapted Material is - always produced where the Licensed Material is synched in timed - relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright and - Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. BY-SA Compatible License means a license listed at - creativecommons.org/compatiblelicenses, approved by Creative Commons - as essentially the equivalent of this Public License. - - d. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - e. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright Treaty - adopted on December 20, 1996, and/or similar international - agreements. - - f. Exceptions and Limitations means fair use, fair dealing, and/or any - other exception or limitation to Copyright and Similar Rights that - applies to Your use of the Licensed Material. - - g. License Elements means the license attributes listed in the name of - a Creative Commons Public License. The License Elements of this - Public License are Attribution and ShareAlike. - - h. Licensed Material means the artistic or literary work, database, or - other material to which the Licensor applied this Public License. - - i. Licensed Rights means the rights granted to You subject to the terms - and conditions of this Public License, which are limited to all - Copyright and Similar Rights that apply to Your use of the Licensed - Material and that the Licensor has authority to license. - - j. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - k. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such as - reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the public - may access the material from a place and at a time individually - chosen by them. - - l. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially equivalent - rights anywhere in the world. m. You means the individual or entity - exercising the Licensed Rights under this Public License. Your has a - corresponding meaning. - -Section 2 - Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, the - Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - A. reproduce and Share the Licensed Material, in whole or in part; and - - B. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with its - terms and conditions. - - 3. Term. The term of this Public License is specified in Section 6(a). - - 4. Media and formats; technical modifications allowed. The Licensor - authorizes You to exercise the Licensed Rights in all media and - formats whether now known or hereafter created, and to make - technical modifications necessary to do so. The Licensor waives - and/or agrees not to assert any right or authority to forbid You - from making technical modifications necessary to exercise the - Licensed Rights, including technical modifications necessary to - circumvent Effective Technological Measures. For purposes of - this Public License, simply making modifications authorized by - this Section 2(a)(4) never produces Adapted Material. - - 5. Downstream recipients. - - A. Offer from the Licensor - Licensed Material. Every recipient - of the Licensed Material automatically receives an offer - from the Licensor to exercise the Licensed Rights under the - terms and conditions of this Public License. - - B. Additional offer from the Licensor - Adapted Material. Every - recipient of Adapted Material from You automatically - receives an offer from the Licensor to exercise the Licensed - Rights in the Adapted Material under the conditions of the - Adapter's License You apply. - - C. No downstream restrictions. You may not offer or impose any - additional or different terms or conditions on, or apply any - Effective Technological Measures to, the Licensed Material - if doing so restricts exercise of the Licensed Rights by any - recipient of the Licensed Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You are, - or that Your use of the Licensed Material is, connected with, or - sponsored, endorsed, or granted official status by, the Licensor - or others designated to receive attribution as provided in - Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not licensed - under this Public License, nor are publicity, privacy, and/or - other similar personality rights; however, to the extent - possible, the Licensor waives and/or agrees not to assert any - such rights held by the Licensor to the limited extent necessary - to allow You to exercise the Licensed Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this Public - License. - - 3. To the extent possible, the Licensor waives any right to collect - royalties from You for the exercise of the Licensed Rights, - whether directly or through a collecting society under any - voluntary or waivable statutory or compulsory licensing - scheme. In all other cases the Licensor expressly reserves any - right to collect such royalties. - -Section 3 - License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified form), - You must: - - A. retain the following if it is supplied by the Licensor with - the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by the - Licensor (including by pseudonym if designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of warranties; - - v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; - - B. indicate if You modified the Licensed Material and retain an - indication of any previous modifications; and - - C. indicate the Licensed Material is licensed under this Public - License, and include the text of, or the URI or hyperlink to, - this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. b. ShareAlike.In addition to the - conditions in Section 3(a), if You Share Adapted Material You - produce, the following conditions also apply. - - 1. The Adapter's License You apply must be a Creative Commons - license with the same License Elements, this version or - later, or a BY-SA Compatible License. - - 2. You must include the text of, or the URI or hyperlink to, the - Adapter's License You apply. You may satisfy this condition - in any reasonable manner based on the medium, means, and - context in which You Share Adapted Material. - - 3. You may not offer or impose any additional or different terms - or conditions on, or apply any Effective Technological - Measures to, Adapted Material that restrict exercise of the - rights granted under the Adapter's License You apply. - -Section 4 - Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that apply to -Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right to - extract, reuse, reproduce, and Share all or a substantial portion of - the contents of the database; - - b. if You include all or a substantial portion of the database contents - in a database in which You have Sui Generis Database Rights, then - the database in which You have Sui Generis Database Rights (but not - its individual contents) is Adapted Material, including for purposes - of Section 3(b); and - - c. You must comply with the conditions in Section 3(a) if You Share all - or a substantial portion of the contents of the database. - - For the avoidance of doubt, this Section 4 supplements and does not - replace Your obligations under this Public License where the Licensed - Rights include other Copyright and Similar Rights. - -Section 5 - Disclaimer of Warranties and Limitation of Liability. - - a. Unless otherwise separately undertaken by the Licensor, to the - extent possible, the Licensor offers the Licensed Material as-is and - as-available, and makes no representations or warranties of any kind - concerning the Licensed Material, whether express, implied, - statutory, or other. This includes, without limitation, warranties - of title, merchantability, fitness for a particular purpose, - non-infringement, absence of latent or other defects, accuracy, or - the presence or absence of errors, whether or not known or - discoverable. Where disclaimers of warranties are not allowed in - full or in part, this disclaimer may not apply to You. - - b. To the extent possible, in no event will the Licensor be liable to - You on any legal theory (including, without limitation, negligence) - or otherwise for any direct, special, indirect, incidental, - consequential, punitive, exemplary, or other losses, costs, - expenses, or damages arising out of this Public License or use of - the Licensed Material, even if the Licensor has been advised of the - possibility of such losses, costs, expenses, or damages. Where a - limitation of liability is not allowed in full or in part, this - limitation may not apply to You. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent possible, - most closely approximates an absolute disclaimer and waiver of all - liability. - -Section 6 - Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided it - is cured within 30 days of Your discovery of the violation; or - - 2. upon express reinstatement by the Licensor. - - c. For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations of - this Public License. - - d. For the avoidance of doubt, the Licensor may also offer the Licensed - Material under separate terms or conditions or stop distributing the - Licensed Material at any time; however, doing so will not terminate - this Public License. - - e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. - -Section 7 - Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different terms - or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - -Section 8 - Interpretation. - - a. For the avoidance of doubt, this Public License does not, and shall - not be interpreted to, reduce, limit, restrict, or impose conditions - on any use of the Licensed Material that could lawfully be made - without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted as - a limitation upon, or waiver of, any privileges and immunities that - apply to the Licensor or You, including from the legal processes of - any jurisdiction or authority. - -Creative Commons is not a party to its public licenses. Notwithstanding, -Creative Commons may elect to apply one of its public licenses to material -it publishes and in those instances will be considered the "Licensor." The -text of the Creative Commons public licenses is dedicated to the public -domain under the CC0 Public Domain Dedication. Except for the limited -purpose of indicating that material is shared under a Creative Commons -public license or as otherwise permitted by the Creative Commons policies -published at creativecommons.org/policies, Creative Commons does not -authorize the use of the trademark "Creative Commons" or any other -trademark or logo of Creative Commons without its prior written consent -including, without limitation, in connection with any unauthorized -modifications to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For the -avoidance of doubt, this paragraph does not form part of the public -licenses. - -Creative Commons may be contacted at creativecommons.org. diff --git a/MAINTAINERS b/MAINTAINERS index 7f1399ac028e..e0eb8ec08789 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7351,15 +7351,16 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git S: Supported F: Documentation/networking/e100.rst F: Documentation/networking/e1000.rst -F: Documentation/networking/e1000e.txt -F: Documentation/networking/igb.txt -F: Documentation/networking/igbvf.txt -F: Documentation/networking/ixgb.txt -F: Documentation/networking/ixgbe.txt -F: Documentation/networking/ixgbevf.txt -F: Documentation/networking/i40e.txt -F: Documentation/networking/iavf.txt -F: Documentation/networking/ice.txt +F: Documentation/networking/e1000e.rst +F: Documentation/networking/fm10k.rst +F: Documentation/networking/igb.rst +F: Documentation/networking/igbvf.rst +F: Documentation/networking/ixgb.rst +F: Documentation/networking/ixgbe.rst +F: Documentation/networking/ixgbevf.rst +F: Documentation/networking/i40e.rst +F: Documentation/networking/iavf.rst +F: Documentation/networking/ice.rst F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ F: include/linux/avf/virtchnl.h @@ -10160,7 +10161,6 @@ L: netdev@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git S: Maintained -F: net/core/flow.c F: net/xfrm/ F: net/key/ F: net/ipv4/xfrm* @@ -13100,7 +13100,7 @@ SELINUX SECURITY MODULE M: Paul Moore <paul@paul-moore.com> M: Stephen Smalley <sds@tycho.nsa.gov> M: Eric Paris <eparis@parisplace.org> -L: selinux@tycho.nsa.gov (moderated for non-subscribers) +L: selinux@vger.kernel.org W: https://selinuxproject.org W: https://github.com/SELinuxProject T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git @@ -15745,7 +15745,7 @@ F: include/linux/regulator/ VRF M: David Ahern <dsa@cumulusnetworks.com> -M: Shrijeet Mukherjee <shm@cumulusnetworks.com> +M: Shrijeet Mukherjee <shrijeet@gmail.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/vrf.c @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Merciless Moray # *DOCUMENTATION* diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 450c7a4fbc8a..cb094e55dc5f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, - /* ICC_ASGI1R */ - { CRm64(12), Op1( 1), is64, access_gic_sgi}, - /* ICC_SGI0R */ - { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* ICC_SRE */ { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre }, diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..b3354ff94e79 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f329b466e68f..2d14f17838d2 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info, r.gr[30] = get_parisc_stackpointer(); regs = &r; } - unwind_frame_init(info, task, &r); + unwind_frame_init(info, task, regs); } else { unwind_frame_init_from_blocked_task(info, task); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2fdc865ca374..2a2486526d1f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -114,7 +114,7 @@ */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * user access blocked by key */ @@ -132,7 +132,7 @@ */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c0440..9c3fc03abe9a 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 09acf0ddec10..45b4bf1875e6 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -427,8 +427,9 @@ #define __NR_preadv2 358 #define __NR_pwritev2 359 #define __NR_statx 360 +#define __NR_io_pgetevents 361 -#define NR_syscalls 361 +#define NR_syscalls 362 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c index cc42225c20f3..4e8f56c3793c 100644 --- a/arch/sparc/kernel/auxio_64.c +++ b/arch/sparc/kernel/auxio_64.c @@ -115,8 +115,8 @@ static int auxio_probe(struct platform_device *dev) auxio_devtype = AUXIO_TYPE_SBUS; size = 1; } else { - printk("auxio: Unknown parent bus type [%pOFn]\n", - dp->parent); + printk("auxio: Unknown parent bus type [%s]\n", + dp->parent->name); return -ENODEV; } auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio"); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d3149baaa33c..67b3e6b3ce5d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/cpudata.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/sched/clock.h> #include <asm/nmi.h> #include <asm/pcr.h> #include <asm/cacheflush.h> @@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, @@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c index d941875dd718..92627abce311 100644 --- a/arch/sparc/kernel/power.c +++ b/arch/sparc/kernel/power.c @@ -41,8 +41,8 @@ static int power_probe(struct platform_device *op) power_reg = of_ioremap(res, 0, 0x4, "power"); - printk(KERN_INFO "%pOFn: Control reg at %llx\n", - op->dev.of_node, res->start); + printk(KERN_INFO "%s: Control reg at %llx\n", + op->dev.of_node->name, res->start); if (has_button_interrupt(irq, op->dev.of_node)) { if (request_irq(irq, diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index 17c87d29ff20..b51cbb9e87dc 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -68,8 +68,8 @@ static void __init sparc32_path_component(struct device_node *dp, char *tmp_buf) return; regs = rprop->value; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs->which_io, regs->phys_addr); } @@ -84,8 +84,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf) return; regs = prop->value; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs->which_io, regs->phys_addr); } @@ -104,13 +104,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; devfn = (regs->phys_hi >> 8) & 0xff; if (devfn & 0x07) { - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, devfn >> 3, devfn & 0x07); } else { - sprintf(tmp_buf, "%pOFn@%x", - dp, + sprintf(tmp_buf, "%s@%x", + dp->name, devfn >> 3); } } @@ -127,8 +127,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs->which_io, regs->phys_addr); } @@ -167,8 +167,8 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf) return; device = prop->value; - sprintf(tmp_buf, "%pOFn:%d:%d@%x,%x", - dp, *vendor, *device, + sprintf(tmp_buf, "%s:%d:%d@%x,%x", + dp->name, *vendor, *device, *intr, reg0); } @@ -201,7 +201,7 @@ char * __init build_path_component(struct device_node *dp) tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp); + strcpy(tmp_buf, dp->name); n = prom_early_alloc(strlen(tmp_buf) + 1); strcpy(n, tmp_buf); diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index 6220411ce8fc..baeaeed64993 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -82,8 +82,8 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf) regs = rprop->value; if (!of_node_is_root(dp->parent)) { - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, (unsigned int) (regs->phys_addr >> 32UL), (unsigned int) (regs->phys_addr & 0xffffffffUL)); return; @@ -97,17 +97,17 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf) const char *prefix = (type == 0) ? "m" : "i"; if (low_bits) - sprintf(tmp_buf, "%pOFn@%s%x,%x", - dp, prefix, + sprintf(tmp_buf, "%s@%s%x,%x", + dp->name, prefix, high_bits, low_bits); else - sprintf(tmp_buf, "%pOFn@%s%x", - dp, + sprintf(tmp_buf, "%s@%s%x", + dp->name, prefix, high_bits); } else if (type == 12) { - sprintf(tmp_buf, "%pOFn@%x", - dp, high_bits); + sprintf(tmp_buf, "%s@%x", + dp->name, high_bits); } } @@ -122,8 +122,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; if (!of_node_is_root(dp->parent)) { - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, (unsigned int) (regs->phys_addr >> 32UL), (unsigned int) (regs->phys_addr & 0xffffffffUL)); return; @@ -138,8 +138,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf) if (tlb_type >= cheetah) mask = 0x7fffff; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, *(u32 *)prop->value, (unsigned int) (regs->phys_addr & mask)); } @@ -156,8 +156,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf) return; regs = prop->value; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs->which_io, regs->phys_addr); } @@ -176,13 +176,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; devfn = (regs->phys_hi >> 8) & 0xff; if (devfn & 0x07) { - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, devfn >> 3, devfn & 0x07); } else { - sprintf(tmp_buf, "%pOFn@%x", - dp, + sprintf(tmp_buf, "%s@%x", + dp->name, devfn >> 3); } } @@ -203,8 +203,8 @@ static void __init upa_path_component(struct device_node *dp, char *tmp_buf) if (!prop) return; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, *(u32 *) prop->value, (unsigned int) (regs->phys_addr & 0xffffffffUL)); } @@ -221,7 +221,7 @@ static void __init vdev_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; - sprintf(tmp_buf, "%pOFn@%x", dp, *regs); + sprintf(tmp_buf, "%s@%x", dp->name, *regs); } /* "name@addrhi,addrlo" */ @@ -236,8 +236,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, + sprintf(tmp_buf, "%s@%x,%x", + dp->name, (unsigned int) (regs->phys_addr >> 32UL), (unsigned int) (regs->phys_addr & 0xffffffffUL)); } @@ -257,8 +257,8 @@ static void __init i2c_path_component(struct device_node *dp, char *tmp_buf) /* This actually isn't right... should look at the #address-cells * property of the i2c bus node etc. etc. */ - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, regs[0], regs[1]); + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs[0], regs[1]); } /* "name@reg0[,reg1]" */ @@ -274,11 +274,11 @@ static void __init usb_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; if (prop->length == sizeof(u32) || regs[1] == 1) { - sprintf(tmp_buf, "%pOFn@%x", - dp, regs[0]); + sprintf(tmp_buf, "%s@%x", + dp->name, regs[0]); } else { - sprintf(tmp_buf, "%pOFn@%x,%x", - dp, regs[0], regs[1]); + sprintf(tmp_buf, "%s@%x,%x", + dp->name, regs[0], regs[1]); } } @@ -295,11 +295,11 @@ static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf regs = prop->value; if (regs[2] || regs[3]) { - sprintf(tmp_buf, "%pOFn@%08x%08x,%04x%08x", - dp, regs[0], regs[1], regs[2], regs[3]); + sprintf(tmp_buf, "%s@%08x%08x,%04x%08x", + dp->name, regs[0], regs[1], regs[2], regs[3]); } else { - sprintf(tmp_buf, "%pOFn@%08x%08x", - dp, regs[0], regs[1]); + sprintf(tmp_buf, "%s@%08x%08x", + dp->name, regs[0], regs[1]); } } @@ -361,7 +361,7 @@ char * __init build_path_component(struct device_node *dp) tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp); + strcpy(tmp_buf, dp->name); n = prom_early_alloc(strlen(tmp_buf) + 1); strcpy(n, tmp_buf); diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index f6528884a2c8..4073e2b87dd0 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -84,8 +84,9 @@ __handle_signal: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 sethi %hi(0xf << 20), %l4 and %l1, %l4, %l4 + andn %l1, %l4, %l1 ba,pt %xcc, __handle_preemption_continue - andn %l1, %l4, %l1 + srl %l4, 20, %l4 /* When returning from a NMI (%pil==15) interrupt we want to * avoid running softirqs, doing IRQ tracing, preempting, etc. diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 12bee14b552c..621a363098ec 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -90,4 +90,4 @@ sys_call_table: /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen /*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .long sys_statx +/*360*/ .long sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 387ef993880a..bb68c805b891 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -91,7 +91,7 @@ sys_call_table32: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, compat_sys_io_pgetevents #endif /* CONFIG_COMPAT */ @@ -173,4 +173,4 @@ sys_call_table: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, sys_io_pgetevents diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index 3feb3d960ca5..75dca9aab737 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -33,9 +33,19 @@ #define TICK_PRIV_BIT (1ULL << 63) #endif +#ifdef CONFIG_SPARC64 #define SYSCALL_STRING \ "ta 0x6d;" \ - "sub %%g0, %%o0, %%o0;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#else +#define SYSCALL_STRING \ + "ta 0x10;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#endif #define SYSCALL_CLOBBERS \ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index f51595f861b8..5eaff3c1aa0c 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -262,7 +262,9 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); + if (err) + return err; vdso_enabled = val; - return err; + return 0; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 28764dacf018..466f66c8a7f8 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2767c625a52c..fbbf1ba57ec6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -389,6 +389,13 @@ * that register for the time this macro runs */ + /* + * The high bits of the CS dword (__csh) are used for + * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case + * hardware didn't do this for us. + */ + andl $(0x0000ffff), PT_CS(%esp) + /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -407,12 +414,6 @@ /* Load top of task-stack into %edi */ movl TSS_entry2task_stack(%edi), %edi - /* - * Clear unused upper bits of the dword containing the word-sized CS - * slot in pt_regs in case hardware didn't clear it for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Special case - entry from kernel mode via entry stack */ #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 957dfb693ecc..f95dcb209fdf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry) xorl %ebx, %ebx 1: + /* + * Always stash CR3 in %r14. This value will be restored, + * verbatim, at exit. Needed if paranoid_entry interrupted + * another entry that already switched to the user CR3 value + * but has not yet returned to userspace. + * + * This is also why CS (stashed in the "iret frame" by the + * hardware at entry) can not be used: this may be a return + * to kernel code, but with a user CR3 value. + */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ret @@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel @@ -1626,6 +1638,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37a..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e9202a0de8f0..1a19d11cfbbd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b64acb08a62b..106b7d0e2dae 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 23f1691670b6..61a949d84dfa 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Validate and sanitize the copied state. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 661583662430..71c0b01d93b1 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override, int __init pci_swiotlb_detect_4gb(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ -#ifdef CONFIG_X86_64 if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) swiotlb = 1; -#endif /* * If SME is active then swiotlb will be set to 1 so that bounce diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be01328eb755..fddaefc51fb6 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,7 +25,7 @@ #include <asm/time.h> #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b52bd2b6cdb4..6d5dc5dabfd7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -58,7 +58,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -75,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -104,7 +104,7 @@ void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d96092b35936..61ccfb13899e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) static inline bool svm_sev_enabled(void) { - return max_sev_asid; + return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; } static inline bool sev_guest(struct kvm *kvm) { +#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; +#else + return false; +#endif } static inline int sev_get_asid(struct kvm *kvm) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 612fd17be635..e665aa7167cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1572,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm) goto out; } + /* + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the + * base of EPT PML4 table, strip off EPT configuration information. + */ ret = hyperv_flush_guest_mapping( - to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); out: spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); diff --git a/block/blk-lib.c b/block/blk-lib.c index d1b9dd03da25..bbd44666f2b5 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -29,9 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, { struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; - unsigned int granularity; unsigned int op; - int alignment; sector_t bs_mask; if (!q) @@ -54,38 +52,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if ((sector | nr_sects) & bs_mask) return -EINVAL; - /* Zero-sector (unknown) and one-sector granularities are the same. */ - granularity = max(q->limits.discard_granularity >> 9, 1U); - alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; - while (nr_sects) { - unsigned int req_sects; - sector_t end_sect, tmp; + unsigned int req_sects = nr_sects; + sector_t end_sect; - /* - * Issue in chunks of the user defined max discard setting, - * ensuring that bi_size doesn't overflow - */ - req_sects = min_t(sector_t, nr_sects, - q->limits.max_discard_sectors); if (!req_sects) goto fail; if (req_sects > UINT_MAX >> 9) req_sects = UINT_MAX >> 9; - /* - * If splitting a request, and the next starting sector would be - * misaligned, stop the discard at the previous aligned sector. - */ end_sect = sector + req_sects; - tmp = end_sect; - if (req_sects < nr_sects && - sector_div(tmp, granularity) != alignment) { - end_sect = end_sect - alignment; - sector_div(end_sect, granularity); - end_sect = end_sect * granularity + alignment; - req_sects = end_sect - sector; - } bio = next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8e20a0677dcf..8ac93fcbaa2e 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -310,6 +310,7 @@ static void scale_up(struct rq_wb *rwb) rq_depth_scale_up(&rwb->rq_depth); calc_wb_limits(rwb); rwb->unknown_cnt = 0; + rwb_wake_all(rwb); rwb_trace_step(rwb, "scale up"); } @@ -318,7 +319,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle) rq_depth_scale_down(&rwb->rq_depth, hard_throttle); calc_wb_limits(rwb); rwb->unknown_cnt = 0; - rwb_wake_all(rwb); rwb_trace_step(rwb, "scale down"); } diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 6470e3c4c990..f8c703426c90 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -241,7 +241,8 @@ static void __iomem *eni_alloc_mem(struct eni_dev *eni_dev, unsigned long *size) len = eni_dev->free_len; if (*size < MID_MIN_BUF_SIZE) *size = MID_MIN_BUF_SIZE; if (*size > MID_MAX_BUF_SIZE) return NULL; - for (order = 0; (1 << order) < *size; order++); + for (order = 0; (1 << order) < *size; order++) + ; DPRINTK("trying: %ld->%d\n",*size,order); best_order = 65; /* we don't have more than 2^64 of anything ... */ index = 0; /* silence GCC */ diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index e89146ddede6..d5c76b50d357 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -126,7 +126,7 @@ static unsigned long dummy[2] = {0,0}; #define zin_n(r) inl(zatm_dev->base+r*4) #define zin(r) inl(zatm_dev->base+uPD98401_##r*4) #define zout(v,r) outl(v,zatm_dev->base+uPD98401_##r*4) -#define zwait while (zin(CMR) & uPD98401_BUSY) +#define zwait() do {} while (zin(CMR) & uPD98401_BUSY) /* RX0, RX1, TX0, TX1 */ static const int mbx_entries[NR_MBX] = { 1024,1024,1024,1024 }; @@ -140,7 +140,7 @@ static const int mbx_esize[NR_MBX] = { 16,16,4,4 }; /* entry size in bytes */ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) { - zwait; + zwait(); zout(value,CER); zout(uPD98401_IND_ACC | uPD98401_IA_BALL | (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); @@ -149,10 +149,10 @@ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) static u32 zpeekl(struct zatm_dev *zatm_dev,u32 addr) { - zwait; + zwait(); zout(uPD98401_IND_ACC | uPD98401_IA_BALL | uPD98401_IA_RW | (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait; + zwait(); return zin(CER); } @@ -241,7 +241,7 @@ static void refill_pool(struct atm_dev *dev,int pool) } if (first) { spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(virt_to_bus(first),CER); zout(uPD98401_ADD_BAT | (pool << uPD98401_POOL_SHIFT) | count, CMR); @@ -508,9 +508,9 @@ static int open_rx_first(struct atm_vcc *vcc) } if (zatm_vcc->pool < 0) return -EMSGSIZE; spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_OPEN_CHAN,CMR); - zwait; + zwait(); DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -571,21 +571,21 @@ static void close_rx(struct atm_vcc *vcc) pos = vcc->vci >> 1; shift = (1-(vcc->vci & 1)) << 4; zpokel(zatm_dev,zpeekl(zatm_dev,pos) & ~(0xffff << shift),pos); - zwait; + zwait(); zout(uPD98401_NOP,CMR); - zwait; + zwait(); zout(uPD98401_NOP,CMR); spin_unlock_irqrestore(&zatm_dev->lock, flags); } spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_DEACT_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); udelay(10); /* why oh why ... ? */ zout(uPD98401_CLOSE_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); if (!(zin(CMR) & uPD98401_CHAN_ADDR)) printk(KERN_CRIT DEV_LABEL "(itf %d): can't close RX channel " "%d\n",vcc->dev->number,zatm_vcc->rx_chan); @@ -699,7 +699,7 @@ printk("NONONONOO!!!!\n"); skb_queue_tail(&zatm_vcc->tx_queue,skb); DPRINTK("QRP=0x%08lx\n",zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+ uPD98401_TXVC_QRP)); - zwait; + zwait(); zout(uPD98401_TX_READY | (zatm_vcc->tx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -891,12 +891,12 @@ static void close_tx(struct atm_vcc *vcc) } spin_lock_irqsave(&zatm_dev->lock, flags); #if 0 - zwait; + zwait(); zout(uPD98401_DEACT_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); #endif - zwait; + zwait(); zout(uPD98401_CLOSE_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); if (!(zin(CMR) & uPD98401_CHAN_ADDR)) printk(KERN_CRIT DEV_LABEL "(itf %d): can't close TX channel " "%d\n",vcc->dev->number,chan); @@ -926,9 +926,9 @@ static int open_tx_first(struct atm_vcc *vcc) zatm_vcc->tx_chan = 0; if (vcc->qos.txtp.traffic_class == ATM_NONE) return 0; spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_OPEN_CHAN,CMR); - zwait; + zwait(); DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -1557,7 +1557,7 @@ static void zatm_phy_put(struct atm_dev *dev,unsigned char value, struct zatm_dev *zatm_dev; zatm_dev = ZATM_DEV(dev); - zwait; + zwait(); zout(value,CER); zout(uPD98401_IND_ACC | uPD98401_IA_B0 | (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); @@ -1569,10 +1569,10 @@ static unsigned char zatm_phy_get(struct atm_dev *dev,unsigned long addr) struct zatm_dev *zatm_dev; zatm_dev = ZATM_DEV(dev); - zwait; + zwait(); zout(uPD98401_IND_ACC | uPD98401_IA_B0 | uPD98401_IA_RW | (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait; + zwait(); return zin(CER) & 0xff; } diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5ca56bfae63c..f68e9baffad7 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -36,6 +36,10 @@ MODULE_VERSION(DRV_MODULE_VERSION); #define VDC_TX_RING_SIZE 512 #define VDC_DEFAULT_BLK_SIZE 512 +#define MAX_XFER_BLKS (128 * 1024) +#define MAX_XFER_SIZE (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE) +#define MAX_RING_COOKIES ((MAX_XFER_BLKS / PAGE_SIZE) + 2) + #define WAITING_FOR_LINK_UP 0x01 #define WAITING_FOR_TX_SPACE 0x02 #define WAITING_FOR_GEN_CMD 0x04 @@ -450,7 +454,7 @@ static int __send_request(struct request *req) { struct vdc_port *port = req->rq_disk->private_data; struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - struct scatterlist sg[port->ring_cookies]; + struct scatterlist sg[MAX_RING_COOKIES]; struct vdc_req_entry *rqe; struct vio_disk_desc *desc; unsigned int map_perm; @@ -458,6 +462,9 @@ static int __send_request(struct request *req) u64 len; u8 op; + if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES)) + return -EINVAL; + map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO; if (rq_data_dir(req) == READ) { @@ -984,9 +991,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto err_out_free_port; port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE; - port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size); - port->ring_cookies = ((port->max_xfer_size * - port->vdisk_block_size) / PAGE_SIZE) + 2; + port->max_xfer_size = MAX_XFER_SIZE; + port->ring_cookies = MAX_RING_COOKIES; err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port); if (err) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 20142bc77554..282d1af1d3ba 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -293,13 +293,17 @@ static int btsdio_probe(struct sdio_func *func, tuple = tuple->next; } - /* BCM43341 devices soldered onto the PCB (non-removable) use an - * uart connection for bluetooth, ignore the BT SDIO interface. + /* Broadcom devices soldered onto the PCB (non-removable) use an + * UART connection for Bluetooth, ignore the BT SDIO interface. */ if (func->vendor == SDIO_VENDOR_ID_BROADCOM && - func->device == SDIO_DEVICE_ID_BROADCOM_43341 && - !mmc_card_is_removable(func->card->host)) - return -ENODEV; + !mmc_card_is_removable(func->card->host)) { + switch (func->device) { + case SDIO_DEVICE_ID_BROADCOM_43341: + case SDIO_DEVICE_ID_BROADCOM_43430: + return -ENODEV; + } + } data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL); if (!data) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 61cde1a7ec1b..7439a7eb50ac 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -264,6 +264,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ + { USB_DEVICE(0x0cf3, 0x535b), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME }, diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 9f1392fc7105..f036c8f98ea3 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -40,6 +40,7 @@ #include <linux/platform_device.h> #include <linux/regulator/consumer.h> #include <linux/serdev.h> +#include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -63,6 +64,9 @@ /* susclk rate */ #define SUSCLK_RATE_32KHZ 32768 +/* Controller debug log header */ +#define QCA_DEBUG_HANDLE 0x2EDC + /* HCI_IBS transmit side sleep protocol states */ enum tx_ibs_states { HCI_IBS_TX_ASLEEP, @@ -849,6 +853,19 @@ static int qca_ibs_wake_ack(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb) +{ + /* We receive debug logs from chip as an ACL packets. + * Instead of sending the data to ACL to decode the + * received data, we are pushing them to the above layers + * as a diagnostic packet. + */ + if (get_unaligned_le16(skb->data) == QCA_DEBUG_HANDLE) + return hci_recv_diag(hdev, skb); + + return hci_recv_frame(hdev, skb); +} + #define QCA_IBS_SLEEP_IND_EVENT \ .type = HCI_IBS_SLEEP_IND, \ .hlen = 0, \ @@ -871,7 +888,7 @@ static int qca_ibs_wake_ack(struct hci_dev *hdev, struct sk_buff *skb) .maxlen = HCI_MAX_IBS_SIZE static const struct h4_recv_pkt qca_recv_pkts[] = { - { H4_RECV_ACL, .recv = hci_recv_frame }, + { H4_RECV_ACL, .recv = qca_recv_acl_data }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind }, diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c index ffa5dac221e4..129ebd2588fd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c +++ b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c @@ -1434,8 +1434,16 @@ static void __init sun4i_ccu_init(struct device_node *node, return; } - /* Force the PLL-Audio-1x divider to 1 */ val = readl(reg + SUN4I_PLL_AUDIO_REG); + + /* + * Force VCO and PLL bias current to lowest setting. Higher + * settings interfere with sigma-delta modulation and result + * in audible noise and distortions when using SPDIF or I2S. + */ + val &= ~GENMASK(25, 16); + + /* Force the PLL-Audio-1x divider to 1 */ val &= ~GENMASK(29, 26); writel(val | (1 << 26), reg + SUN4I_PLL_AUDIO_REG); diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 04f277cade7c..62249d4ed373 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -237,9 +237,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) static int __init chcr_crypto_init(void) { - if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) - pr_err("ULD register fail: No chcr crypto support in cxgb4\n"); - + cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info); return 0; } diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 018fcdb353d2..281cf9cbb44c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -174,6 +174,11 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->crtcs[i].state = NULL; state->crtcs[i].old_state = NULL; state->crtcs[i].new_state = NULL; + + if (state->crtcs[i].commit) { + drm_crtc_commit_put(state->crtcs[i].commit); + state->crtcs[i].commit = NULL; + } } for (i = 0; i < config->num_total_plane; i++) { diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 80be74df7ba6..1bb4c318bdd4 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1408,15 +1408,16 @@ EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks); void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev, struct drm_atomic_state *old_state) { - struct drm_crtc_state *new_crtc_state; struct drm_crtc *crtc; int i; - for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i) { - struct drm_crtc_commit *commit = new_crtc_state->commit; + for (i = 0; i < dev->mode_config.num_crtc; i++) { + struct drm_crtc_commit *commit = old_state->crtcs[i].commit; int ret; - if (!commit) + crtc = old_state->crtcs[i].ptr; + + if (!crtc || !commit) continue; ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ); @@ -1934,6 +1935,9 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, drm_crtc_commit_get(commit); commit->abort_completion = true; + + state->crtcs[i].commit = commit; + drm_crtc_commit_get(commit); } for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) { diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index bae43938c8f6..9cbe8f5c9aca 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -567,9 +567,9 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_crtc *crtc_req = data; struct drm_crtc *crtc; struct drm_plane *plane; - struct drm_connector **connector_set = NULL, *connector; - struct drm_framebuffer *fb = NULL; - struct drm_display_mode *mode = NULL; + struct drm_connector **connector_set, *connector; + struct drm_framebuffer *fb; + struct drm_display_mode *mode; struct drm_mode_set set; uint32_t __user *set_connectors_ptr; struct drm_modeset_acquire_ctx ctx; @@ -598,6 +598,10 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, mutex_lock(&crtc->dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); retry: + connector_set = NULL; + fb = NULL; + mode = NULL; + ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx); if (ret) goto out; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3c9fc99648b7..ff0bfc65a8c1 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -113,6 +113,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ + { "BOE", 0x78b, EDID_QUIRK_FORCE_6BPC }, + /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, @@ -4279,7 +4282,7 @@ static void drm_parse_ycbcr420_deep_color_info(struct drm_connector *connector, struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; dc_mask = db[7] & DRM_EDID_YCBCR420_DC_MASK; - hdmi->y420_dc_modes |= dc_mask; + hdmi->y420_dc_modes = dc_mask; } static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector, diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 515a7aec57ac..9628dd617826 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1580,6 +1580,25 @@ unlock: } EXPORT_SYMBOL(drm_fb_helper_ioctl); +static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1, + const struct fb_var_screeninfo *var_2) +{ + return var_1->bits_per_pixel == var_2->bits_per_pixel && + var_1->grayscale == var_2->grayscale && + var_1->red.offset == var_2->red.offset && + var_1->red.length == var_2->red.length && + var_1->red.msb_right == var_2->red.msb_right && + var_1->green.offset == var_2->green.offset && + var_1->green.length == var_2->green.length && + var_1->green.msb_right == var_2->green.msb_right && + var_1->blue.offset == var_2->blue.offset && + var_1->blue.length == var_2->blue.length && + var_1->blue.msb_right == var_2->blue.msb_right && + var_1->transp.offset == var_2->transp.offset && + var_1->transp.length == var_2->transp.length && + var_1->transp.msb_right == var_2->transp.msb_right; +} + /** * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var * @var: screeninfo to check @@ -1590,7 +1609,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; - int depth; if (var->pixclock != 0 || in_dbg_master()) return -EINVAL; @@ -1610,72 +1628,15 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } - switch (var->bits_per_pixel) { - case 16: - depth = (var->green.length == 6) ? 16 : 15; - break; - case 32: - depth = (var->transp.length > 0) ? 32 : 24; - break; - default: - depth = var->bits_per_pixel; - break; - } - - switch (depth) { - case 8: - var->red.offset = 0; - var->green.offset = 0; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 15: - var->red.offset = 10; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 5; - var->blue.length = 5; - var->transp.length = 1; - var->transp.offset = 15; - break; - case 16: - var->red.offset = 11; - var->green.offset = 5; - var->blue.offset = 0; - var->red.length = 5; - var->green.length = 6; - var->blue.length = 5; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 24: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 0; - var->transp.offset = 0; - break; - case 32: - var->red.offset = 16; - var->green.offset = 8; - var->blue.offset = 0; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - var->transp.length = 8; - var->transp.offset = 24; - break; - default: + /* + * drm fbdev emulation doesn't support changing the pixel format at all, + * so reject all pixel format changing requests. + */ + if (!drm_fb_pixel_format_equal(var, &info->var)) { + DRM_DEBUG("fbdev emulation doesn't support changing the pixel format\n"); return -EINVAL; } + return 0; } EXPORT_SYMBOL(drm_fb_helper_check_var); diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c index e36004fbe453..2a15f2f9271e 100644 --- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c +++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c @@ -81,9 +81,19 @@ static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate, int i; for (i = tcon->dclk_min_div; i <= tcon->dclk_max_div; i++) { - unsigned long ideal = rate * i; + u64 ideal = (u64)rate * i; unsigned long rounded; + /* + * ideal has overflowed the max value that can be stored in an + * unsigned long, and every clk operation we might do on a + * truncated u64 value will give us incorrect results. + * Let's just stop there since bigger dividers will result in + * the same overflow issue. + */ + if (ideal > ULONG_MAX) + goto out; + rounded = clk_hw_round_rate(clk_hw_get_parent(hw), ideal); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9ee9a15e7134..9200e349f29e 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2270,7 +2270,7 @@ EXPORT_SYMBOL(i2c_put_adapter); * * Return: NULL if a DMA safe buffer was not obtained. Use msg->buf with PIO. * Or a valid pointer to be used with DMA. After use, release it by - * calling i2c_release_dma_safe_msg_buf(). + * calling i2c_put_dma_safe_msg_buf(). * * This function must only be called from process context! */ diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index faa9e6116b2f..73332b9a25b5 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,6 +46,8 @@ #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/nospec.h> + #include <linux/uaccess.h> #include <rdma/ib.h> @@ -1120,6 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 21863ddde63e..01d68ed46c1b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -44,6 +44,8 @@ #include <linux/module.h> #include <linux/nsproxy.h> +#include <linux/nospec.h> + #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> @@ -1676,6 +1678,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 088205d7f1a1..cca1820802b8 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -393,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { - mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf); + mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); } static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, @@ -728,16 +728,11 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, int nent, int cqe_size) { - struct mlx5_frag_buf_ctrl *c = &buf->fbc; - struct mlx5_frag_buf *frag_buf = &c->frag_buf; - u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0}; + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); + u8 log_wq_sz = ilog2(cqe_size); int err; - MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size)); - MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0); - - mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff); - err = mlx5_frag_buf_alloc_node(dev->mdev, nent * cqe_size, frag_buf, @@ -745,6 +740,8 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, if (err) return err; + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + buf->cqe_size = cqe_size; buf->nent = nent; @@ -934,7 +931,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * - cq->buf.fbc.frag_buf.npages; + cq->buf.frag_buf.npages; *cqb = kvzalloc(*inlen, GFP_KERNEL); if (!*cqb) { err = -ENOMEM; @@ -942,11 +939,11 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, } pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); - mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas); + mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); MLX5_SET(cqc, cqc, log_page_size, - cq->buf.fbc.frag_buf.page_shift - + cq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); *index = dev->mdev->priv.uar->index; @@ -1365,11 +1362,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) cqe_size = 64; err = resize_kernel(dev, cq, entries, cqe_size); if (!err) { - struct mlx5_frag_buf_ctrl *c; + struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf; - c = &cq->resize_buf->fbc; - npas = c->frag_buf.npages; - page_shift = c->frag_buf.page_shift; + npas = frag_buf->npages; + page_shift = frag_buf->page_shift; } } @@ -1390,8 +1386,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, pas, 0); else - mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf, - pas); + mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas); MLX5_SET(modify_cq_in, in, modify_field_select_resize_field_select.resize_field_select.resize_field_select, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index f2f11e652dcd..66dc337e49a7 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -284,7 +284,7 @@ static bool devx_is_obj_create_cmd(const void *in) case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: @@ -627,9 +627,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); break; - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5d9b7f62a0ba..af32899bb72a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2793,7 +2793,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, return -EINVAL; action->flow_tag = ib_spec->flow_tag.tag_id; - action->has_flow_tag = true; + action->flags |= FLOW_ACT_HAS_TAG; break; case IB_FLOW_SPEC_ACTION_DROP: if (FIELDS_NOT_SUPPORTED(ib_spec->drop, @@ -2886,7 +2886,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; return is_crypto && is_ipsec && - (!egress || (!is_drop && !flow_act->has_flow_tag)) ? + (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ? VALID_SPEC_VALID : VALID_SPEC_INVALID; } @@ -3320,15 +3320,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + struct mlx5_ib_mcounters *mcounters; + err = flow_counters_set_data(flow_act.counters, ucmd); if (err) goto free; + mcounters = to_mcounters(flow_act.counters); handler->ibcounters = flow_act.counters; dest_arr[dest_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter = - to_mcounters(flow_act.counters)->hw_cntrs_hndl; + dest_arr[dest_num].counter_id = + mlx5_fc_id(mcounters->hw_cntrs_hndl); dest_num++; } @@ -3346,7 +3349,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } - if (flow_act.has_flow_tag && + if ((flow_act.flags & FLOW_ACT_HAS_TAG) && (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 320d4dfe8c2f..289c18db2611 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -435,6 +435,7 @@ struct mlx5_ib_qp { struct mlx5_ib_cq_buf { struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; struct ib_umem *umem; int cqe_size; int nent; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6cba2a02d11b..daf1eb84cd31 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1279,7 +1279,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, if (dev->rep) MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); @@ -1582,7 +1582,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, create_tir: if (dev->rep) MLX5_SET(tirc, tirc, self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index f5ae24865355..b0f9d19b3410 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1346,6 +1346,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ee28ec9e0aba..ffa37adb7681 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -963,7 +963,8 @@ static inline void slave_disable_netpoll(struct slave *slave) return; slave->np = NULL; - __netpoll_free_async(np); + + __netpoll_free(np); } static void bond_poll_controller(struct net_device *bond_dev) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3017ecf82ca5..2eb68769562c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1190,16 +1190,14 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int bcm_sf2_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + struct bcm_sf2_priv *priv = dev_get_drvdata(dev); return dsa_switch_suspend(priv->dev->ds); } static int bcm_sf2_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + struct bcm_sf2_priv *priv = dev_get_drvdata(dev); return dsa_switch_resume(priv->dev->ds); } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 78ce820b5257..e05d4eddc935 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2907,7 +2907,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed = mv88e6341_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3528,7 +3528,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed = mv88e6341_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 92945841c8e8..cd7db60a508b 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -228,8 +228,11 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000; break; case 2500: - ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 | - MV88E6390_PORT_MAC_CTL_ALTSPEED; + if (alt_bit) + ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 | + MV88E6390_PORT_MAC_CTL_ALTSPEED; + else + ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000; break; case 10000: /* all bits set, fall through... */ @@ -291,6 +294,24 @@ int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) return mv88e6xxx_port_set_speed(chip, port, speed, false, false); } +/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6341) */ +int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 5 ? 1000 : 2500; + + if (speed > 2500) + return -EOPNOTSUPP; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed == 2500 && port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, !port, true); +} + /* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */ int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) { diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index f32f56af8e35..36904c9bf955 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -269,6 +269,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup); int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index cdcde7f8e0b2..7e97e620bd44 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -955,8 +955,7 @@ qca8k_set_pm(struct qca8k_priv *priv, int enable) static int qca8k_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct qca8k_priv *priv = platform_get_drvdata(pdev); + struct qca8k_priv *priv = dev_get_drvdata(dev); qca8k_set_pm(priv, 0); @@ -965,8 +964,7 @@ static int qca8k_suspend(struct device *dev) static int qca8k_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct qca8k_priv *priv = platform_get_drvdata(pdev); + struct qca8k_priv *priv = dev_get_drvdata(dev); qca8k_set_pm(priv, 1); diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig index 99b30353541a..9e87d7b8360f 100644 --- a/drivers/net/ethernet/amazon/Kconfig +++ b/drivers/net/ethernet/amazon/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_AMAZON config ENA_ETHERNET tristate "Elastic Network Adapter (ENA) support" - depends on (PCI_MSI && X86) + depends on PCI_MSI && !CPU_BIG_ENDIAN ---help--- This driver supports Elastic Network Adapter (ENA)" diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 284a0a612131..18956e7604a3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3022,20 +3022,10 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev, int io_sq_num, io_queue_num; /* In case of LLQ use the llq number in the get feature cmd */ - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num; - - if (io_sq_num == 0) { - dev_err(&pdev->dev, - "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); - - ena_dev->tx_mem_queue_type = - ENA_ADMIN_PLACEMENT_POLICY_HOST; - io_sq_num = get_feat_ctx->max_queues.max_sq_num; - } - } else { + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + io_sq_num = get_feat_ctx->llq.max_llq_num; + else io_sq_num = get_feat_ctx->max_queues.max_sq_num; - } io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); io_queue_num = min_t(int, io_queue_num, io_sq_num); @@ -3238,7 +3228,7 @@ static int ena_calc_queue_size(struct pci_dev *pdev, if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) queue_size = min_t(u32, queue_size, - get_feat_ctx->max_queues.max_legacy_llq_depth); + get_feat_ctx->llq.max_llq_depth); queue_size = rounddown_pow_of_two(queue_size); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index c0568465e10b..096ca5730887 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -279,7 +279,7 @@ static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac) return err; } -int aq_fw2x_update_stats(struct aq_hw_s *self) +static int aq_fw2x_update_stats(struct aq_hw_s *self) { int err = 0; u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 0fe57e36912b..498b373c992d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1471,7 +1471,7 @@ struct bnxt { struct rx_port_stats *hw_rx_port_stats; struct tx_port_stats *hw_tx_port_stats; struct rx_port_stats_ext *hw_rx_port_stats_ext; - struct rx_port_stats_ext *hw_tx_port_stats_ext; + struct tx_port_stats_ext *hw_tx_port_stats_ext; dma_addr_t hw_rx_port_stats_map; dma_addr_t hw_tx_port_stats_map; dma_addr_t hw_rx_port_stats_ext_map; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index b756fc79424e..35564a8a48f9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -320,9 +320,12 @@ int bcmgenet_mii_probe(struct net_device *dev) phydev->advertising = phydev->supported; /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. */ - if (priv->internal_phy) + if (priv->internal_phy && !GENET_IS_V5(priv)) dev->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 0acaef3ef548..8f5bf9166c11 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4156,8 +4156,7 @@ static int macb_remove(struct platform_device *pdev) static int __maybe_unused macb_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *netdev = platform_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev); struct macb *bp = netdev_priv(netdev); netif_carrier_off(netdev); @@ -4179,8 +4178,7 @@ static int __maybe_unused macb_suspend(struct device *dev) static int __maybe_unused macb_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *netdev = platform_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev); struct macb *bp = netdev_priv(netdev); if (bp->wol & MACB_WOL_ENABLED) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 4bc211093c98..267322693ed5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -702,15 +702,14 @@ static void uld_attach(struct adapter *adap, unsigned int uld) * about any presently available devices that support its type. Returns * %-EBUSY if a ULD of the same type is already registered. */ -int cxgb4_register_uld(enum cxgb4_uld type, - const struct cxgb4_uld_info *p) +void cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) { int ret = 0; - unsigned int adap_idx = 0; struct adapter *adap; if (type >= CXGB4_ULD_MAX) - return -EINVAL; + return; mutex_lock(&uld_mutex); list_for_each_entry(adap, &adapter_list, list_node) { @@ -733,52 +732,29 @@ int cxgb4_register_uld(enum cxgb4_uld type, } if (adap->flags & FULL_INIT_DONE) enable_rx_uld(adap, type); - if (adap->uld[type].add) { - ret = -EBUSY; + if (adap->uld[type].add) goto free_irq; - } ret = setup_sge_txq_uld(adap, type, p); if (ret) goto free_irq; adap->uld[type] = *p; uld_attach(adap, type); - adap_idx++; - } - mutex_unlock(&uld_mutex); - return 0; - + continue; free_irq: - if (adap->flags & FULL_INIT_DONE) - quiesce_rx_uld(adap, type); - if (adap->flags & USING_MSIX) - free_msix_queue_irqs_uld(adap, type); -free_rxq: - free_sge_queues_uld(adap, type); -free_queues: - free_queues_uld(adap, type); -out: - - list_for_each_entry(adap, &adapter_list, list_node) { - if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || - (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) - continue; - if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) - continue; - if (!adap_idx) - break; - adap->uld[type].handle = NULL; - adap->uld[type].add = NULL; - release_sge_txq_uld(adap, type); if (adap->flags & FULL_INIT_DONE) quiesce_rx_uld(adap, type); if (adap->flags & USING_MSIX) free_msix_queue_irqs_uld(adap, type); +free_rxq: free_sge_queues_uld(adap, type); +free_queues: free_queues_uld(adap, type); - adap_idx--; +out: + dev_warn(adap->pdev_dev, + "ULD registration failed for uld type %d\n", type); } mutex_unlock(&uld_mutex); - return ret; + return; } EXPORT_SYMBOL(cxgb4_register_uld); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index de9ad311dacd..5fa9a2d5fc4b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -384,7 +384,7 @@ struct cxgb4_uld_info { int (*tx_handler)(struct sk_buff *skb, struct net_device *dev); }; -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); +void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); int cxgb4_immdata_send(struct net_device *dev, unsigned int idx, diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 50222b7b81f3..0a82fcf16d35 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1722,8 +1722,7 @@ out: static int dm9000_drv_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct board_info *db; if (ndev) { @@ -1745,8 +1744,7 @@ dm9000_drv_suspend(struct device *dev) static int dm9000_drv_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct board_info *db = netdev_priv(ndev); if (ndev) { diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 4778b663653e..bf80855dd0dd 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -452,6 +452,10 @@ struct bufdesc_ex { * initialisation. */ #define FEC_QUIRK_MIB_CLEAR (1 << 15) +/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers, + * those FIFO receive registers are resolved in other platforms. + */ +#define FEC_QUIRK_HAS_FRREG (1 << 16) struct bufdesc_prop { int qid; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a17cc973d9a3..6db69ba30dcd 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -91,14 +91,16 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx27-fec", - .driver_data = FEC_QUIRK_MIB_CLEAR, + .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | - FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC, + FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_FRREG, }, { .name = "imx6q-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | @@ -2162,7 +2164,13 @@ static void fec_enet_get_regs(struct net_device *ndev, memset(buf, 0, regs->len); for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { - off = fec_enet_register_offset[i] / 4; + off = fec_enet_register_offset[i]; + + if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && + !(fep->quirks & FEC_QUIRK_HAS_FRREG)) + continue; + + off >>= 2; buf[off] = readl(&theregs[off]); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index ce93fcce2732..76ce2f21178b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -977,35 +977,28 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb, } static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, - int size, dma_addr_t dma, int frag_end, - enum hns_desc_type type) + int size, int frag_end, enum hns_desc_type type) { struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct hns3_desc *desc = &ring->desc[ring->next_to_use]; + struct device *dev = ring_to_dev(ring); u32 ol_type_vlan_len_msec = 0; u16 bdtp_fe_sc_vld_ra_ri = 0; + struct skb_frag_struct *frag; + unsigned int frag_buf_num; u32 type_cs_vlan_tso = 0; struct sk_buff *skb; u16 inner_vtag = 0; u16 out_vtag = 0; + unsigned int k; + int sizeoflast; u32 paylen = 0; + dma_addr_t dma; u16 mss = 0; u8 ol4_proto; u8 il4_proto; int ret; - /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */ - desc_cb->priv = priv; - desc_cb->length = size; - desc_cb->dma = dma; - desc_cb->type = type; - - /* now, fill the descriptor */ - desc->addr = cpu_to_le64(dma); - desc->tx.send_size = cpu_to_le16((u16)size); - hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end); - desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri); - if (type == DESC_TYPE_SKB) { skb = (struct sk_buff *)priv; paylen = skb->len; @@ -1046,38 +1039,47 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc->tx.mss = cpu_to_le16(mss); desc->tx.vlan_tag = cpu_to_le16(inner_vtag); desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag); - } - /* move ring pointer to next.*/ - ring_ptr_move_fw(ring, next_to_use); + dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); + } else { + frag = (struct skb_frag_struct *)priv; + dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); + } - return 0; -} + if (dma_mapping_error(ring->dev, dma)) { + ring->stats.sw_err_cnt++; + return -ENOMEM; + } -static int hns3_fill_desc_tso(struct hns3_enet_ring *ring, void *priv, - int size, dma_addr_t dma, int frag_end, - enum hns_desc_type type) -{ - unsigned int frag_buf_num; - unsigned int k; - int sizeoflast; - int ret; + desc_cb->length = size; frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE; sizeoflast = size % HNS3_MAX_BD_SIZE; sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE; - /* When the frag size is bigger than hardware, split this frag */ + /* When frag size is bigger than hardware limit, split this frag */ for (k = 0; k < frag_buf_num; k++) { - ret = hns3_fill_desc(ring, priv, - (k == frag_buf_num - 1) ? - sizeoflast : HNS3_MAX_BD_SIZE, - dma + HNS3_MAX_BD_SIZE * k, - frag_end && (k == frag_buf_num - 1) ? 1 : 0, - (type == DESC_TYPE_SKB && !k) ? - DESC_TYPE_SKB : DESC_TYPE_PAGE); - if (ret) - return ret; + /* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */ + desc_cb->priv = priv; + desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k; + desc_cb->type = (type == DESC_TYPE_SKB && !k) ? + DESC_TYPE_SKB : DESC_TYPE_PAGE; + + /* now, fill the descriptor */ + desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k); + desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ? + (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE); + hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, + frag_end && (k == frag_buf_num - 1) ? + 1 : 0); + desc->tx.bdtp_fe_sc_vld_ra_ri = + cpu_to_le16(bdtp_fe_sc_vld_ra_ri); + + /* move ring pointer to next.*/ + ring_ptr_move_fw(ring, next_to_use); + + desc_cb = &ring->desc_cb[ring->next_to_use]; + desc = &ring->desc[ring->next_to_use]; } return 0; @@ -1133,7 +1135,7 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum, return 0; } -static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig) +static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) { struct device *dev = ring_to_dev(ring); unsigned int i; @@ -1149,12 +1151,14 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig) ring->desc_cb[ring->next_to_use].dma, ring->desc_cb[ring->next_to_use].length, DMA_TO_DEVICE); - else + else if (ring->desc_cb[ring->next_to_use].length) dma_unmap_page(dev, ring->desc_cb[ring->next_to_use].dma, ring->desc_cb[ring->next_to_use].length, DMA_TO_DEVICE); + ring->desc_cb[ring->next_to_use].length = 0; + /* rollback one */ ring_ptr_move_bw(ring, next_to_use); } @@ -1166,12 +1170,10 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) struct hns3_nic_ring_data *ring_data = &tx_ring_data(priv, skb->queue_mapping); struct hns3_enet_ring *ring = ring_data->ring; - struct device *dev = priv->dev; struct netdev_queue *dev_queue; struct skb_frag_struct *frag; int next_to_use_head; int next_to_use_frag; - dma_addr_t dma; int buf_num; int seg_num; int size; @@ -1206,35 +1208,23 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) next_to_use_head = ring->next_to_use; - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) { - netdev_err(netdev, "TX head DMA map failed\n"); - ring->stats.sw_err_cnt++; - goto out_err_tx_ok; - } - - ret = priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0, - DESC_TYPE_SKB); + ret = priv->ops.fill_desc(ring, skb, size, seg_num == 1 ? 1 : 0, + DESC_TYPE_SKB); if (ret) - goto head_dma_map_err; + goto head_fill_err; next_to_use_frag = ring->next_to_use; /* Fill the fragments */ for (i = 1; i < seg_num; i++) { frag = &skb_shinfo(skb)->frags[i - 1]; size = skb_frag_size(frag); - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) { - netdev_err(netdev, "TX frag(%d) DMA map failed\n", i); - ring->stats.sw_err_cnt++; - goto frag_dma_map_err; - } - ret = priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma, - seg_num - 1 == i ? 1 : 0, - DESC_TYPE_PAGE); + + ret = priv->ops.fill_desc(ring, frag, size, + seg_num - 1 == i ? 1 : 0, + DESC_TYPE_PAGE); if (ret) - goto frag_dma_map_err; + goto frag_fill_err; } /* Complete translate all packets */ @@ -1247,11 +1237,11 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; -frag_dma_map_err: - hns_nic_dma_unmap(ring, next_to_use_frag); +frag_fill_err: + hns3_clear_desc(ring, next_to_use_frag); -head_dma_map_err: - hns_nic_dma_unmap(ring, next_to_use_head); +head_fill_err: + hns3_clear_desc(ring, next_to_use_head); out_err_tx_ok: dev_kfree_skb_any(skb); @@ -1313,13 +1303,10 @@ static int hns3_nic_set_features(struct net_device *netdev, int ret; if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) { - if (features & (NETIF_F_TSO | NETIF_F_TSO6)) { - priv->ops.fill_desc = hns3_fill_desc_tso; + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso; - } else { - priv->ops.fill_desc = hns3_fill_desc; + else priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx; - } } if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) && @@ -1890,7 +1877,7 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring, if (cb->type == DESC_TYPE_SKB) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); - else + else if (cb->length) dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); } @@ -3247,14 +3234,12 @@ static void hns3_nic_set_priv_ops(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); + priv->ops.fill_desc = hns3_fill_desc; if ((netdev->features & NETIF_F_TSO) || - (netdev->features & NETIF_F_TSO6)) { - priv->ops.fill_desc = hns3_fill_desc_tso; + (netdev->features & NETIF_F_TSO6)) priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso; - } else { - priv->ops.fill_desc = hns3_fill_desc; + else priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx; - } } static int hns3_client_init(struct hnae3_handle *handle) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index f25b281ff2aa..71cfca132d0b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -419,8 +419,7 @@ struct hns3_nic_ring_data { struct hns3_nic_ops { int (*fill_desc)(struct hns3_enet_ring *ring, void *priv, - int size, dma_addr_t dma, int frag_end, - enum hns_desc_type type); + int size, int frag_end, enum hns_desc_type type); int (*maybe_stop_tx)(struct sk_buff **out_skb, int *bnum, struct hns3_enet_ring *ring); void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index 0f5563f3b779..097b5502603f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -58,6 +58,8 @@ enum hinic_port_cmd { HINIC_PORT_CMD_GET_GLOBAL_QPN = 102, + HINIC_PORT_CMD_SET_TSO = 112, + HINIC_PORT_CMD_GET_CAP = 170, }; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c index cb239627770f..967c993d5303 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c @@ -70,8 +70,6 @@ #define SQ_MASKED_IDX(sq, idx) ((idx) & (sq)->wq->mask) #define RQ_MASKED_IDX(rq, idx) ((idx) & (rq)->wq->mask) -#define TX_MAX_MSS_DEFAULT 0x3E00 - enum sq_wqe_type { SQ_NORMAL_WQE = 0, }; @@ -494,33 +492,16 @@ static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx, HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | HINIC_SQ_CTRL_SET(ctrl_size, LEN); - ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT, - QUEUE_INFO_MSS); + ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT, + QUEUE_INFO_MSS) | + HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC); } static void sq_prepare_task(struct hinic_sq_task *task) { - task->pkt_info0 = - HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) | - HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) | - HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN, - INNER_L3TYPE) | - HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE, - VLAN_OFFLOAD) | - HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG); - - task->pkt_info1 = - HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) | - HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) | - HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN); - - task->pkt_info2 = - HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) | - HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN) | - HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN, - TUNNEL_L4TYPE) | - HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN, - OUTER_L3TYPE); + task->pkt_info0 = 0; + task->pkt_info1 = 0; + task->pkt_info2 = 0; task->ufo_v6_identify = 0; @@ -529,6 +510,86 @@ static void sq_prepare_task(struct hinic_sq_task *task) task->zero_pad = 0; } +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len) +{ + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN); +} + +void hinic_task_set_outter_l3(struct hinic_sq_task *task, + enum hinic_l3_offload_type l3_type, + u32 network_len) +{ + task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) | + HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN); +} + +void hinic_task_set_inner_l3(struct hinic_sq_task *task, + enum hinic_l3_offload_type l3_type, + u32 network_len) +{ + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE); + task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN); +} + +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task, + enum hinic_l4_offload_type l4_type, + u32 tunnel_len) +{ + task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) | + HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN); +} + +void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info, + enum hinic_l4_offload_type l4_offload, + u32 l4_len, u32 offset) +{ + u32 tcp_udp_cs = 0, sctp = 0; + u32 mss = HINIC_MSS_DEFAULT; + + if (l4_offload == TCP_OFFLOAD_ENABLE || + l4_offload == UDP_OFFLOAD_ENABLE) + tcp_udp_cs = 1; + else if (l4_offload == SCTP_OFFLOAD_ENABLE) + sctp = 1; + + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD); + task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN); + + *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) | + HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) | + HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP); + + *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS); + *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS); +} + +void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info, + enum hinic_l4_offload_type l4_offload, + u32 l4_len, u32 offset, u32 ip_ident, u32 mss) +{ + u32 tso = 0, ufo = 0; + + if (l4_offload == TCP_OFFLOAD_ENABLE) + tso = 1; + else if (l4_offload == UDP_OFFLOAD_ENABLE) + ufo = 1; + + task->ufo_v6_identify = ip_ident; + + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD); + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG); + task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN); + + *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) | + HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) | + HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) | + HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS); + + /* set MSS value */ + *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS); + *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS); +} + /** * hinic_sq_prepare_wqe - prepare wqe before insert to the queue * @sq: send queue @@ -613,6 +674,16 @@ struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq, } /** + * hinic_sq_return_wqe - return the wqe to the sq + * @sq: send queue + * @wqe_size: the size of the wqe + **/ +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size) +{ + hinic_return_wqe(sq->wq, wqe_size); +} + +/** * hinic_sq_write_wqe - write the wqe to the sq * @sq: send queue * @prod_idx: pi of the wqe diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h index 6c84f83ec283..a0dc63a4bfc7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h @@ -149,6 +149,31 @@ int hinic_get_sq_free_wqebbs(struct hinic_sq *sq); int hinic_get_rq_free_wqebbs(struct hinic_rq *rq); +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len); + +void hinic_task_set_outter_l3(struct hinic_sq_task *task, + enum hinic_l3_offload_type l3_type, + u32 network_len); + +void hinic_task_set_inner_l3(struct hinic_sq_task *task, + enum hinic_l3_offload_type l3_type, + u32 network_len); + +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task, + enum hinic_l4_offload_type l4_type, + u32 tunnel_len); + +void hinic_set_cs_inner_l4(struct hinic_sq_task *task, + u32 *queue_info, + enum hinic_l4_offload_type l4_offload, + u32 l4_len, u32 offset); + +void hinic_set_tso_inner_l4(struct hinic_sq_task *task, + u32 *queue_info, + enum hinic_l4_offload_type l4_offload, + u32 l4_len, + u32 offset, u32 ip_ident, u32 mss); + void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx, struct hinic_sq_wqe *wqe, struct hinic_sge *sges, int nr_sges); @@ -159,6 +184,8 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size, struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq, unsigned int wqe_size, u16 *prod_idx); +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size); + void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx, struct hinic_sq_wqe *wqe, struct sk_buff *skb, unsigned int wqe_size); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c index 3e3181c089bd..f92f1bf3901a 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c @@ -775,6 +775,20 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size, } /** + * hinic_return_wqe - return the wqe when transmit failed + * @wq: wq to return wqe + * @wqe_size: wqe size + **/ +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size) +{ + int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size; + + atomic_sub(num_wqebbs, &wq->prod_idx); + + atomic_add(num_wqebbs, &wq->delta); +} + +/** * hinic_put_wqe - return the wqe place to use for a new wqe * @wq: wq to return wqe * @wqe_size: wqe size diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h index 9c030a0f035e..9b66545ba563 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h @@ -104,6 +104,8 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq); struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size, u16 *prod_idx); +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size); + void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size); struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h index bc73485483c5..9754d6ed5f4a 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h @@ -62,19 +62,33 @@ (((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \ & HINIC_CMDQ_WQE_HEADER_##member##_MASK) -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0 -#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT 16 -#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT 22 -#define HINIC_SQ_CTRL_LEN_SHIFT 29 - -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFF -#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK 0x1F -#define HINIC_SQ_CTRL_DATA_FORMAT_MASK 0x1 -#define HINIC_SQ_CTRL_LEN_MASK 0x3 - -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13 - -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFF +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0 +#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT 16 +#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT 22 +#define HINIC_SQ_CTRL_LEN_SHIFT 29 + +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFF +#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK 0x1F +#define HINIC_SQ_CTRL_DATA_FORMAT_MASK 0x1 +#define HINIC_SQ_CTRL_LEN_MASK 0x3 + +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT 2 +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT 10 +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT 11 +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT 12 +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13 +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT 27 +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT 28 +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT 29 + +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK 0xFF +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK 0x1 +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK 0x1 +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK 0x1 +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFF +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK 0x1 +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK 0x1 +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK 0x7 #define HINIC_SQ_CTRL_SET(val, member) \ (((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \ @@ -84,6 +98,10 @@ (((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \ & HINIC_SQ_CTRL_##member##_MASK) +#define HINIC_SQ_CTRL_CLEAR(val, member) \ + ((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \ + << HINIC_SQ_CTRL_##member##_SHIFT))) + #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT 0 #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT 8 #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT 10 @@ -108,28 +126,28 @@ /* 8 bits reserved */ #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT 8 -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT 16 -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT 24 +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT 16 +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT 24 /* 8 bits reserved */ #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK 0xFF -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK 0xFF -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK 0xFF +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK 0xFF +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK 0xFF #define HINIC_SQ_TASK_INFO1_SET(val, member) \ (((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) << \ HINIC_SQ_TASK_INFO1_##member##_SHIFT) -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0 -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT 12 -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19 +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT 0 +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT 8 +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16 /* 1 bit reserved */ -#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 22 +#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 24 /* 8 bits reserved */ -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK 0xFFF -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK 0x7F -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x3 +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK 0xFF +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK 0xFF +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x7 /* 1 bit reserved */ #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK 0x3 /* 8 bits reserved */ @@ -187,12 +205,15 @@ sizeof(struct hinic_sq_task) + \ (nr_sges) * sizeof(struct hinic_sq_bufdesc)) -#define HINIC_SCMD_DATA_LEN 16 +#define HINIC_SCMD_DATA_LEN 16 + +#define HINIC_MAX_SQ_BUFDESCS 17 -#define HINIC_MAX_SQ_BUFDESCS 17 +#define HINIC_SQ_WQE_MAX_SIZE 320 +#define HINIC_RQ_WQE_SIZE 32 -#define HINIC_SQ_WQE_MAX_SIZE 320 -#define HINIC_RQ_WQE_SIZE 32 +#define HINIC_MSS_DEFAULT 0x3E00 +#define HINIC_MSS_MIN 0x50 enum hinic_l4offload_type { HINIC_L4_OFF_DISABLE = 0, @@ -211,6 +232,26 @@ enum hinic_pkt_parsed { HINIC_PKT_PARSED = 1, }; +enum hinic_l3_offload_type { + L3TYPE_UNKNOWN = 0, + IPV6_PKT = 1, + IPV4_PKT_NO_CHKSUM_OFFLOAD = 2, + IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3, +}; + +enum hinic_l4_offload_type { + OFFLOAD_DISABLE = 0, + TCP_OFFLOAD_ENABLE = 1, + SCTP_OFFLOAD_ENABLE = 2, + UDP_OFFLOAD_ENABLE = 3, +}; + +enum hinic_l4_tunnel_type { + NOT_TUNNEL, + TUNNEL_UDP_NO_CSUM, + TUNNEL_UDP_CSUM, +}; + enum hinic_outer_l3type { HINIC_OUTER_L3TYPE_UNKNOWN = 0, HINIC_OUTER_L3TYPE_IPV6 = 1, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 4a8f82938ed5..fdf2bdb6b0d0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -805,7 +805,8 @@ static const struct net_device_ops hinic_netdev_ops = { static void netdev_features_init(struct net_device *netdev) { - netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA; + netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; netdev->vlan_features = netdev->hw_features; @@ -863,6 +864,20 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size, *out_size = sizeof(*ret_link_status); } +static int set_features(struct hinic_dev *nic_dev, + netdev_features_t pre_features, + netdev_features_t features, bool force_change) +{ + netdev_features_t changed = force_change ? ~0 : pre_features ^ features; + int err = 0; + + if (changed & NETIF_F_TSO) + err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ? + HINIC_TSO_ENABLE : HINIC_TSO_DISABLE); + + return err; +} + /** * nic_dev_init - Initialize the NIC device * @pdev: the NIC pci device @@ -963,7 +978,12 @@ static int nic_dev_init(struct pci_dev *pdev) hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS, nic_dev, link_status_event_handler); + err = set_features(nic_dev, 0, nic_dev->netdev->features, true); + if (err) + goto err_set_features; + SET_NETDEV_DEV(netdev, &pdev->dev); + err = register_netdev(netdev); if (err) { dev_err(&pdev->dev, "Failed to register netdev\n"); @@ -973,6 +993,7 @@ static int nic_dev_init(struct pci_dev *pdev) return 0; err_reg_netdev: +err_set_features: hinic_hwdev_cb_unregister(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS); cancel_work_sync(&rx_mode_work->work); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index 4d4e3f05fb5f..7575a7d3bd9f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -377,3 +377,35 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev, return 0; } + +/** + * hinic_port_set_tso - set port tso configuration + * @nic_dev: nic device + * @state: the tso state to set + * + * Return 0 - Success, negative - Failure + **/ +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct hinic_tso_config tso_cfg = {0}; + struct pci_dev *pdev = hwif->pdev; + u16 out_size; + int err; + + tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif); + tso_cfg.tso_en = state; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO, + &tso_cfg, sizeof(tso_cfg), + &tso_cfg, &out_size); + if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) { + dev_err(&pdev->dev, + "Failed to set port tso, ret = %d\n", + tso_cfg.status); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h index 9404365195dd..f6e3220fe28f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h @@ -72,6 +72,11 @@ enum hinic_speed { HINIC_SPEED_UNKNOWN = 0xFF, }; +enum hinic_tso_state { + HINIC_TSO_DISABLE = 0, + HINIC_TSO_ENABLE = 1, +}; + struct hinic_port_mac_cmd { u8 status; u8 version; @@ -167,6 +172,17 @@ struct hinic_port_cap { u8 rsvd2[3]; }; +struct hinic_tso_config { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u16 rsvd1; + u8 tso_en; + u8 resv2[3]; +}; + int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr, u16 vlan_id); @@ -195,4 +211,6 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev, int hinic_port_get_cap(struct hinic_dev *nic_dev, struct hinic_port_cap *port_cap); +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state); + #endif diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index c5fca0356c9c..11e73e67358d 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -26,6 +26,13 @@ #include <linux/skbuff.h> #include <linux/smp.h> #include <asm/byteorder.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/sctp.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <net/checksum.h> +#include <net/ip6_checksum.h> #include "hinic_common.h" #include "hinic_hw_if.h" @@ -45,9 +52,31 @@ #define CI_UPDATE_NO_PENDING 0 #define CI_UPDATE_NO_COALESC 0 -#define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr)) +#define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr)) -#define MIN_SKB_LEN 64 +#define MIN_SKB_LEN 17 + +#define MAX_PAYLOAD_OFFSET 221 +#define TRANSPORT_OFFSET(l4_hdr, skb) ((u32)((l4_hdr) - (skb)->data)) + +union hinic_l3 { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; +}; + +union hinic_l4 { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; +}; + +enum hinic_offload_type { + TX_OFFLOAD_TSO = BIT(0), + TX_OFFLOAD_CSUM = BIT(1), + TX_OFFLOAD_VLAN = BIT(2), + TX_OFFLOAD_INVALID = BIT(3), +}; /** * hinic_txq_clean_stats - Clean the statistics of specific queue @@ -175,18 +204,263 @@ static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb, DMA_TO_DEVICE); } +static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip, + union hinic_l4 *l4, + enum hinic_offload_type offload_type, + enum hinic_l3_offload_type *l3_type, + u8 *l4_proto) +{ + u8 *exthdr; + + if (ip->v4->version == 4) { + *l3_type = (offload_type == TX_OFFLOAD_CSUM) ? + IPV4_PKT_NO_CHKSUM_OFFLOAD : + IPV4_PKT_WITH_CHKSUM_OFFLOAD; + *l4_proto = ip->v4->protocol; + } else if (ip->v4->version == 6) { + *l3_type = IPV6_PKT; + exthdr = ip->hdr + sizeof(*ip->v6); + *l4_proto = ip->v6->nexthdr; + if (exthdr != l4->hdr) { + int start = exthdr - skb->data; + __be16 frag_off; + + ipv6_skip_exthdr(skb, start, l4_proto, &frag_off); + } + } else { + *l3_type = L3TYPE_UNKNOWN; + *l4_proto = 0; + } +} + +static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4, + enum hinic_offload_type offload_type, u8 l4_proto, + enum hinic_l4_offload_type *l4_offload, + u32 *l4_len, u32 *offset) +{ + *l4_offload = OFFLOAD_DISABLE; + *offset = 0; + *l4_len = 0; + + switch (l4_proto) { + case IPPROTO_TCP: + *l4_offload = TCP_OFFLOAD_ENABLE; + /* doff in unit of 4B */ + *l4_len = l4->tcp->doff * 4; + *offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb); + break; + + case IPPROTO_UDP: + *l4_offload = UDP_OFFLOAD_ENABLE; + *l4_len = sizeof(struct udphdr); + *offset = TRANSPORT_OFFSET(l4->hdr, skb); + break; + + case IPPROTO_SCTP: + /* only csum offload support sctp */ + if (offload_type != TX_OFFLOAD_CSUM) + break; + + *l4_offload = SCTP_OFFLOAD_ENABLE; + *l4_len = sizeof(struct sctphdr); + *offset = TRANSPORT_OFFSET(l4->hdr, skb); + break; + + default: + break; + } +} + +static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto) +{ + return (ip->v4->version == 4) ? + csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) : + csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0); +} + +static int offload_tso(struct hinic_sq_task *task, u32 *queue_info, + struct sk_buff *skb) +{ + u32 offset, l4_len, ip_identify, network_hdr_len; + enum hinic_l3_offload_type l3_offload; + enum hinic_l4_offload_type l4_offload; + union hinic_l3 ip; + union hinic_l4 l4; + u8 l4_proto; + + if (!skb_is_gso(skb)) + return 0; + + if (skb_cow_head(skb, 0) < 0) + return -EPROTONOSUPPORT; + + if (skb->encapsulation) { + u32 gso_type = skb_shinfo(skb)->gso_type; + u32 tunnel_type = 0; + u32 l4_tunnel_len; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + network_hdr_len = skb_inner_network_header_len(skb); + + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD; + } else if (ip.v4->version == 6) { + l3_offload = IPV6_PKT; + } else { + l3_offload = 0; + } + + hinic_task_set_outter_l3(task, l3_offload, + skb_network_header_len(skb)); + + if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP); + tunnel_type = TUNNEL_UDP_CSUM; + } else if (gso_type & SKB_GSO_UDP_TUNNEL) { + tunnel_type = TUNNEL_UDP_NO_CSUM; + } + + l4_tunnel_len = skb_inner_network_offset(skb) - + skb_transport_offset(skb); + hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len); + + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + } else { + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + network_hdr_len = skb_network_header_len(skb); + } + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) + ip.v4->tot_len = 0; + else + ip.v6->payload_len = 0; + + get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload, + &l4_proto); + + hinic_task_set_inner_l3(task, l3_offload, network_hdr_len); + + ip_identify = 0; + if (l4_proto == IPPROTO_TCP) + l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP); + + get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload, + &l4_len, &offset); + + hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset, + ip_identify, skb_shinfo(skb)->gso_size); + + return 1; +} + +static int offload_csum(struct hinic_sq_task *task, u32 *queue_info, + struct sk_buff *skb) +{ + enum hinic_l4_offload_type l4_offload; + u32 offset, l4_len, network_hdr_len; + enum hinic_l3_offload_type l3_type; + union hinic_l3 ip; + union hinic_l4 l4; + u8 l4_proto; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (skb->encapsulation) { + u32 l4_tunnel_len; + + ip.hdr = skb_network_header(skb); + + if (ip.v4->version == 4) + l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD; + else if (ip.v4->version == 6) + l3_type = IPV6_PKT; + else + l3_type = L3TYPE_UNKNOWN; + + hinic_task_set_outter_l3(task, l3_type, + skb_network_header_len(skb)); + + l4_tunnel_len = skb_inner_network_offset(skb) - + skb_transport_offset(skb); + + hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM, + l4_tunnel_len); + + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + network_hdr_len = skb_inner_network_header_len(skb); + } else { + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + network_hdr_len = skb_network_header_len(skb); + } + + get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type, + &l4_proto); + + hinic_task_set_inner_l3(task, l3_type, network_hdr_len); + + get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload, + &l4_len, &offset); + + hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset); + + return 1; +} + +static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task, + u32 *queue_info) +{ + enum hinic_offload_type offload = 0; + int enabled; + + enabled = offload_tso(task, queue_info, skb); + if (enabled > 0) { + offload |= TX_OFFLOAD_TSO; + } else if (enabled == 0) { + enabled = offload_csum(task, queue_info, skb); + if (enabled) + offload |= TX_OFFLOAD_CSUM; + } else { + return -EPROTONOSUPPORT; + } + + if (offload) + hinic_task_set_l2hdr(task, skb_network_offset(skb)); + + /* payload offset should not more than 221 */ + if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) > + MAX_PAYLOAD_OFFSET) { + return -EPROTONOSUPPORT; + } + + /* mss should not less than 80 */ + if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) { + *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS); + *queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS); + } + + return 0; +} + netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct hinic_dev *nic_dev = netdev_priv(netdev); + u16 prod_idx, q_id = skb->queue_mapping; struct netdev_queue *netdev_txq; int nr_sges, err = NETDEV_TX_OK; struct hinic_sq_wqe *sq_wqe; unsigned int wqe_size; struct hinic_txq *txq; struct hinic_qp *qp; - u16 prod_idx; - txq = &nic_dev->txqs[skb->queue_mapping]; + txq = &nic_dev->txqs[q_id]; qp = container_of(txq->sq, struct hinic_qp, sq); if (skb->len < MIN_SKB_LEN) { @@ -236,15 +510,23 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) process_sq_wqe: hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges); + err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info); + if (err) + goto offload_error; + hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size); flush_skbs: - netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping); + netdev_txq = netdev_get_tx_queue(netdev, q_id); if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq))) hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0); return err; +offload_error: + hinic_sq_return_wqe(txq->sq, wqe_size); + tx_unmap_skb(nic_dev, skb, txq->sges); + skb_error: dev_kfree_skb_any(skb); @@ -252,7 +534,8 @@ update_error_stats: u64_stats_update_begin(&txq->txq_stats.syncp); txq->txq_stats.tx_dropped++; u64_stats_update_end(&txq->txq_stats.syncp); - return err; + + return NETDEV_TX_OK; } /** diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index b542aba6f0e8..fd3373d82a9e 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -68,6 +68,9 @@ config E1000E <http://support.intel.com> + More specific information on configuring the driver is in + <file:Documentation/networking/e1000e.rst>. + To compile this driver as a module, choose M here. The module will be called e1000e. @@ -94,7 +97,7 @@ config IGB <http://support.intel.com> More specific information on configuring the driver is in - <file:Documentation/networking/e1000.rst>. + <file:Documentation/networking/igb.rst>. To compile this driver as a module, choose M here. The module will be called igb. @@ -130,7 +133,7 @@ config IGBVF <http://support.intel.com> More specific information on configuring the driver is in - <file:Documentation/networking/e1000.rst>. + <file:Documentation/networking/igbvf.rst>. To compile this driver as a module, choose M here. The module will be called igbvf. @@ -147,7 +150,7 @@ config IXGB <http://support.intel.com> More specific information on configuring the driver is in - <file:Documentation/networking/ixgb.txt>. + <file:Documentation/networking/ixgb.rst>. To compile this driver as a module, choose M here. The module will be called ixgb. @@ -164,6 +167,9 @@ config IXGBE <http://support.intel.com> + More specific information on configuring the driver is in + <file:Documentation/networking/ixgbe.rst>. + To compile this driver as a module, choose M here. The module will be called ixgbe. @@ -205,7 +211,7 @@ config IXGBEVF <http://support.intel.com> More specific information on configuring the driver is in - <file:Documentation/networking/ixgbevf.txt>. + <file:Documentation/networking/ixgbevf.rst>. To compile this driver as a module, choose M here. The module will be called ixgbevf. MSI-X interrupt support is required @@ -222,6 +228,9 @@ config I40E <http://support.intel.com> + More specific information on configuring the driver is in + <file:Documentation/networking/i40e.rst>. + To compile this driver as a module, choose M here. The module will be called i40e. @@ -254,6 +263,9 @@ config I40EVF This driver was formerly named i40evf. + More specific information on configuring the driver is in + <file:Documentation/networking/iavf.rst>. + To compile this driver as a module, choose M here. The module will be called iavf. MSI-X interrupt support is required for this driver to work correctly. @@ -269,6 +281,9 @@ config ICE <http://support.intel.com> + More specific information on configuring the driver is in + <file:Documentation/networking/ice.rst>. + To compile this driver as a module, choose M here. The module will be called ice. @@ -284,7 +299,26 @@ config FM10K <http://support.intel.com> + More specific information on configuring the driver is in + <file:Documentation/networking/fm10k.rst>. + To compile this driver as a module, choose M here. The module will be called fm10k. MSI-X interrupt support is required +config IGC + tristate "Intel(R) Ethernet Controller I225-LM/I225-V support" + default n + depends on PCI + ---help--- + This driver supports Intel(R) Ethernet Controller I225-LM/I225-V + family of adapters. + + For more information on how to identify your adapter, go + to the Adapter & Driver ID Guide that can be located at: + + <http://support.intel.com> + + To compile this driver as a module, choose M here. The module + will be called igc. + endif # NET_VENDOR_INTEL diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index b91153df6ee8..3075290063f6 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_E100) += e100.o obj-$(CONFIG_E1000) += e1000/ obj-$(CONFIG_E1000E) += e1000e/ obj-$(CONFIG_IGB) += igb/ +obj-$(CONFIG_IGC) += igc/ obj-$(CONFIG_IGBVF) += igbvf/ obj-$(CONFIG_IXGBE) += ixgbe/ obj-$(CONFIG_IXGBEVF) += ixgbevf/ diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile new file mode 100644 index 000000000000..4387f6ba8e67 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Intel Corporation + +# +# Intel(R) I225-LM/I225-V 2.5G Ethernet Controller +# + +obj-$(CONFIG_IGC) += igc.o + +igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h new file mode 100644 index 000000000000..cdf18a5d9e08 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -0,0 +1,443 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_H_ +#define _IGC_H_ + +#include <linux/kobject.h> + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/vmalloc.h> + +#include <linux/ethtool.h> + +#include <linux/sctp.h> + +#define IGC_ERR(args...) pr_err("igc: " args) + +#define PFX "igc: " + +#include <linux/timecounter.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_clock_kernel.h> + +#include "igc_hw.h" + +/* main */ +extern char igc_driver_name[]; +extern char igc_driver_version[]; + +/* Interrupt defines */ +#define IGC_START_ITR 648 /* ~6000 ints/sec */ +#define IGC_FLAG_HAS_MSI BIT(0) +#define IGC_FLAG_QUEUE_PAIRS BIT(4) +#define IGC_FLAG_NEED_LINK_UPDATE BIT(9) +#define IGC_FLAG_MEDIA_RESET BIT(10) +#define IGC_FLAG_MAS_ENABLE BIT(12) +#define IGC_FLAG_HAS_MSIX BIT(13) +#define IGC_FLAG_VLAN_PROMISC BIT(15) + +#define IGC_START_ITR 648 /* ~6000 ints/sec */ +#define IGC_4K_ITR 980 +#define IGC_20K_ITR 196 +#define IGC_70K_ITR 56 + +#define IGC_DEFAULT_ITR 3 /* dynamic */ +#define IGC_MAX_ITR_USECS 10000 +#define IGC_MIN_ITR_USECS 10 +#define NON_Q_VECTORS 1 +#define MAX_MSIX_ENTRIES 10 + +/* TX/RX descriptor defines */ +#define IGC_DEFAULT_TXD 256 +#define IGC_DEFAULT_TX_WORK 128 +#define IGC_MIN_TXD 80 +#define IGC_MAX_TXD 4096 + +#define IGC_DEFAULT_RXD 256 +#define IGC_MIN_RXD 80 +#define IGC_MAX_RXD 4096 + +/* Transmit and receive queues */ +#define IGC_MAX_RX_QUEUES 4 +#define IGC_MAX_TX_QUEUES 4 + +#define MAX_Q_VECTORS 8 +#define MAX_STD_JUMBO_FRAME_SIZE 9216 + +/* Supported Rx Buffer Sizes */ +#define IGC_RXBUFFER_256 256 +#define IGC_RXBUFFER_2048 2048 +#define IGC_RXBUFFER_3072 3072 + +#define IGC_RX_HDR_LEN IGC_RXBUFFER_256 + +/* RX and TX descriptor control thresholds. + * PTHRESH - MAC will consider prefetch if it has fewer than this number of + * descriptors available in its onboard memory. + * Setting this to 0 disables RX descriptor prefetch. + * HTHRESH - MAC will only prefetch if there are at least this many descriptors + * available in host memory. + * If PTHRESH is 0, this should also be 0. + * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back + * descriptors until either it has this many to write back, or the + * ITR timer expires. + */ +#define IGC_RX_PTHRESH 8 +#define IGC_RX_HTHRESH 8 +#define IGC_TX_PTHRESH 8 +#define IGC_TX_HTHRESH 1 +#define IGC_RX_WTHRESH 4 +#define IGC_TX_WTHRESH 16 + +#define IGC_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + +#define IGC_TS_HDR_LEN 16 + +#define IGC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) + +#if (PAGE_SIZE < 8192) +#define IGC_MAX_FRAME_BUILD_SKB \ + (SKB_WITH_OVERHEAD(IGC_RXBUFFER_2048) - IGC_SKB_PAD - IGC_TS_HDR_LEN) +#else +#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN) +#endif + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define IGC_RX_BUFFER_WRITE 16 /* Must be power of 2 */ + +/* igc_test_staterr - tests bits within Rx descriptor status and error fields */ +static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc, + const u32 stat_err_bits) +{ + return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); +} + +enum igc_state_t { + __IGC_TESTING, + __IGC_RESETTING, + __IGC_DOWN, + __IGC_PTP_TX_IN_PROGRESS, +}; + +enum igc_tx_flags { + /* cmd_type flags */ + IGC_TX_FLAGS_VLAN = 0x01, + IGC_TX_FLAGS_TSO = 0x02, + IGC_TX_FLAGS_TSTAMP = 0x04, + + /* olinfo flags */ + IGC_TX_FLAGS_IPV4 = 0x10, + IGC_TX_FLAGS_CSUM = 0x20, +}; + +enum igc_boards { + board_base, +}; + +/* The largest size we can write to the descriptor is 65535. In order to + * maintain a power of two alignment we have to limit ourselves to 32K. + */ +#define IGC_MAX_TXD_PWR 15 +#define IGC_MAX_DATA_PER_TXD BIT(IGC_MAX_TXD_PWR) + +/* Tx Descriptors needed, worst case */ +#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD) +#define DESC_NEEDED (MAX_SKB_FRAGS + 4) + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer + */ +struct igc_tx_buffer { + union igc_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + struct sk_buff *skb; + unsigned int bytecount; + u16 gso_segs; + __be16 protocol; + + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; +}; + +struct igc_rx_buffer { + dma_addr_t dma; + struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 page_offset; +#else + __u16 page_offset; +#endif + __u16 pagecnt_bias; +}; + +struct igc_tx_queue_stats { + u64 packets; + u64 bytes; + u64 restart_queue; + u64 restart_queue2; +}; + +struct igc_rx_queue_stats { + u64 packets; + u64 bytes; + u64 drops; + u64 csum_err; + u64 alloc_failed; +}; + +struct igc_rx_packet_stats { + u64 ipv4_packets; /* IPv4 headers processed */ + u64 ipv4e_packets; /* IPv4E headers with extensions processed */ + u64 ipv6_packets; /* IPv6 headers processed */ + u64 ipv6e_packets; /* IPv6E headers with extensions processed */ + u64 tcp_packets; /* TCP headers processed */ + u64 udp_packets; /* UDP headers processed */ + u64 sctp_packets; /* SCTP headers processed */ + u64 nfs_packets; /* NFS headers processe */ + u64 other_packets; +}; + +struct igc_ring_container { + struct igc_ring *ring; /* pointer to linked list of rings */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 work_limit; /* total work allowed per interrupt */ + u8 count; /* total number of rings in vector */ + u8 itr; /* current ITR setting for ring */ +}; + +struct igc_ring { + struct igc_q_vector *q_vector; /* backlink to q_vector */ + struct net_device *netdev; /* back pointer to net_device */ + struct device *dev; /* device for dma mapping */ + union { /* array of buffer info structs */ + struct igc_tx_buffer *tx_buffer_info; + struct igc_rx_buffer *rx_buffer_info; + }; + void *desc; /* descriptor ring memory */ + unsigned long flags; /* ring specific flags */ + void __iomem *tail; /* pointer to ring tail register */ + dma_addr_t dma; /* phys address of the ring */ + unsigned int size; /* length of desc. ring in bytes */ + + u16 count; /* number of desc. in the ring */ + u8 queue_index; /* logical index of the ring*/ + u8 reg_idx; /* physical index of the ring */ + + /* everything past this point are written often */ + u16 next_to_clean; + u16 next_to_use; + u16 next_to_alloc; + + union { + /* TX */ + struct { + struct igc_tx_queue_stats tx_stats; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync tx_syncp2; + }; + /* RX */ + struct { + struct igc_rx_queue_stats rx_stats; + struct igc_rx_packet_stats pkt_stats; + struct u64_stats_sync rx_syncp; + struct sk_buff *skb; + }; + }; +} ____cacheline_internodealigned_in_smp; + +struct igc_q_vector { + struct igc_adapter *adapter; /* backlink */ + void __iomem *itr_register; + u32 eims_value; /* EIMS mask value */ + + u16 itr_val; + u8 set_itr; + + struct igc_ring_container rx, tx; + + struct napi_struct napi; + + struct rcu_head rcu; /* to avoid race with update stats on free */ + char name[IFNAMSIZ + 9]; + struct net_device poll_dev; + + /* for dynamic allocation of rings associated with this q_vector */ + struct igc_ring ring[0] ____cacheline_internodealigned_in_smp; +}; + +struct igc_mac_addr { + u8 addr[ETH_ALEN]; + u8 queue; + u8 state; /* bitmask */ +}; + +#define IGC_MAC_STATE_DEFAULT 0x1 +#define IGC_MAC_STATE_MODIFIED 0x2 +#define IGC_MAC_STATE_IN_USE 0x4 + +/* Board specific private data structure */ +struct igc_adapter { + struct net_device *netdev; + + unsigned long state; + unsigned int flags; + unsigned int num_q_vectors; + + struct msix_entry *msix_entries; + + /* TX */ + u16 tx_work_limit; + u32 tx_timeout_count; + int num_tx_queues; + struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES]; + + /* RX */ + int num_rx_queues; + struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES]; + + struct timer_list watchdog_timer; + struct timer_list dma_err_timer; + struct timer_list phy_info_timer; + + u16 link_speed; + u16 link_duplex; + + u8 port_num; + + u8 __iomem *io_addr; + /* Interrupt Throttle Rate */ + u32 rx_itr_setting; + u32 tx_itr_setting; + + struct work_struct reset_task; + struct work_struct watchdog_task; + struct work_struct dma_err_task; + bool fc_autoneg; + + u8 tx_timeout_factor; + + int msg_enable; + u32 max_frame_size; + u32 min_frame_size; + + /* OS defined structs */ + struct pci_dev *pdev; + /* lock for statistics */ + spinlock_t stats64_lock; + struct rtnl_link_stats64 stats64; + + /* structs defined in igc_hw.h */ + struct igc_hw hw; + struct igc_hw_stats stats; + + struct igc_q_vector *q_vector[MAX_Q_VECTORS]; + u32 eims_enable_mask; + u32 eims_other; + + u16 tx_ring_count; + u16 rx_ring_count; + + u32 *shadow_vfta; + + u32 rss_queues; + + /* lock for RX network flow classification filter */ + spinlock_t nfc_lock; + + struct igc_mac_addr *mac_table; + + unsigned long link_check_timeout; + struct igc_info ei; +}; + +/* igc_desc_unused - calculate if we have unused descriptors */ +static inline u16 igc_desc_unused(const struct igc_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; +} + +static inline s32 igc_get_phy_info(struct igc_hw *hw) +{ + if (hw->phy.ops.get_phy_info) + return hw->phy.ops.get_phy_info(hw); + + return 0; +} + +static inline s32 igc_reset_phy(struct igc_hw *hw) +{ + if (hw->phy.ops.reset) + return hw->phy.ops.reset(hw); + + return 0; +} + +static inline struct netdev_queue *txring_txq(const struct igc_ring *tx_ring) +{ + return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); +} + +enum igc_ring_flags_t { + IGC_RING_FLAG_RX_3K_BUFFER, + IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, + IGC_RING_FLAG_RX_SCTP_CSUM, + IGC_RING_FLAG_RX_LB_VLAN_BSWAP, + IGC_RING_FLAG_TX_CTX_IDX, + IGC_RING_FLAG_TX_DETECT_HANG +}; + +#define ring_uses_large_buffer(ring) \ + test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) + +#define ring_uses_build_skb(ring) \ + test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) + +static inline unsigned int igc_rx_bufsz(struct igc_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring_uses_large_buffer(ring)) + return IGC_RXBUFFER_3072; + + if (ring_uses_build_skb(ring)) + return IGC_MAX_FRAME_BUILD_SKB + IGC_TS_HDR_LEN; +#endif + return IGC_RXBUFFER_2048; +} + +static inline unsigned int igc_rx_pg_order(struct igc_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring_uses_large_buffer(ring)) + return 1; +#endif + return 0; +} + +static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data) +{ + if (hw->phy.ops.read_reg) + return hw->phy.ops.read_reg(hw, offset, data); + + return 0; +} + +#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) + +#define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS) + +#define IGC_RX_DESC(R, i) \ + (&(((union igc_adv_rx_desc *)((R)->desc))[i])) +#define IGC_TX_DESC(R, i) \ + (&(((union igc_adv_tx_desc *)((R)->desc))[i])) +#define IGC_TX_CTXTDESC(R, i) \ + (&(((struct igc_adv_tx_context_desc *)((R)->desc))[i])) + +#endif /* _IGC_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c new file mode 100644 index 000000000000..832da609d9a7 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -0,0 +1,541 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +#include <linux/delay.h> + +#include "igc_hw.h" +#include "igc_i225.h" +#include "igc_mac.h" +#include "igc_base.h" +#include "igc.h" + +/** + * igc_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + */ +static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw) +{ + u32 gcr = rd32(IGC_GCR); + u16 pcie_devctl2; + s32 ret_val = 0; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & IGC_GCR_CMPL_TMOUT_MASK) + goto out; + + /* if capabilities version is type 1 we can write the + * timeout of 10ms to 200ms through the GCR register + */ + if (!(gcr & IGC_GCR_CAP_VER2)) { + gcr |= IGC_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); + if (ret_val) + goto out; + + pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; + + ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND; + + wr32(IGC_GCR, gcr); + + return ret_val; +} + +/** + * igc_check_for_link_base - Check for link + * @hw: pointer to the HW structure + * + * If sgmii is enabled, then use the pcs register to determine link, otherwise + * use the generic interface for determining link. + */ +static s32 igc_check_for_link_base(struct igc_hw *hw) +{ + s32 ret_val = 0; + + ret_val = igc_check_for_copper_link(hw); + + return ret_val; +} + +/** + * igc_reset_hw_base - Reset hardware + * @hw: pointer to the HW structure + * + * This resets the hardware into a known state. This is a + * function pointer entry point called by the api module. + */ +static s32 igc_reset_hw_base(struct igc_hw *hw) +{ + s32 ret_val; + u32 ctrl; + + /* Prevent the PCI-E bus from sticking if there is no TLP connection + * on the last TLP read/write transaction when MAC is reset. + */ + ret_val = igc_disable_pcie_master(hw); + if (ret_val) + hw_dbg("PCI-E Master disable polling has failed.\n"); + + /* set the completion timeout for interface */ + ret_val = igc_set_pcie_completion_timeout(hw); + if (ret_val) + hw_dbg("PCI-E Set completion timeout has failed.\n"); + + hw_dbg("Masking off all interrupts\n"); + wr32(IGC_IMC, 0xffffffff); + + wr32(IGC_RCTL, 0); + wr32(IGC_TCTL, IGC_TCTL_PSP); + wrfl(); + + usleep_range(10000, 20000); + + ctrl = rd32(IGC_CTRL); + + hw_dbg("Issuing a global reset to MAC\n"); + wr32(IGC_CTRL, ctrl | IGC_CTRL_RST); + + ret_val = igc_get_auto_rd_done(hw); + if (ret_val) { + /* When auto config read does not complete, do not + * return with an error. This can happen in situations + * where there is no eeprom and prevents getting link. + */ + hw_dbg("Auto Read Done did not complete\n"); + } + + /* Clear any pending interrupt events. */ + wr32(IGC_IMC, 0xffffffff); + rd32(IGC_ICR); + + return ret_val; +} + +/** + * igc_get_phy_id_base - Retrieve PHY addr and id + * @hw: pointer to the HW structure + * + * Retrieves the PHY address and ID for both PHY's which do and do not use + * sgmi interface. + */ +static s32 igc_get_phy_id_base(struct igc_hw *hw) +{ + s32 ret_val = 0; + + ret_val = igc_get_phy_id(hw); + + return ret_val; +} + +/** + * igc_init_nvm_params_base - Init NVM func ptrs. + * @hw: pointer to the HW structure + */ +static s32 igc_init_nvm_params_base(struct igc_hw *hw) +{ + struct igc_nvm_info *nvm = &hw->nvm; + u32 eecd = rd32(IGC_EECD); + u16 size; + + size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >> + IGC_EECD_SIZE_EX_SHIFT); + + /* Added to a constant, "size" becomes the left-shift value + * for setting word_size. + */ + size += NVM_WORD_SIZE_BASE_SHIFT; + + /* Just in case size is out of range, cap it to the largest + * EEPROM size supported + */ + if (size > 15) + size = 15; + + nvm->word_size = BIT(size); + nvm->opcode_bits = 8; + nvm->delay_usec = 1; + + nvm->page_size = eecd & IGC_EECD_ADDR_BITS ? 32 : 8; + nvm->address_bits = eecd & IGC_EECD_ADDR_BITS ? + 16 : 8; + + if (nvm->word_size == BIT(15)) + nvm->page_size = 128; + + return 0; +} + +/** + * igc_setup_copper_link_base - Configure copper link settings + * @hw: pointer to the HW structure + * + * Configures the link for auto-neg or forced speed and duplex. Then we check + * for link, once link is established calls to configure collision distance + * and flow control are called. + */ +static s32 igc_setup_copper_link_base(struct igc_hw *hw) +{ + s32 ret_val = 0; + u32 ctrl; + + ctrl = rd32(IGC_CTRL); + ctrl |= IGC_CTRL_SLU; + ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX); + wr32(IGC_CTRL, ctrl); + + ret_val = igc_setup_copper_link(hw); + + return ret_val; +} + +/** + * igc_init_mac_params_base - Init MAC func ptrs. + * @hw: pointer to the HW structure + */ +static s32 igc_init_mac_params_base(struct igc_hw *hw) +{ + struct igc_dev_spec_base *dev_spec = &hw->dev_spec._base; + struct igc_mac_info *mac = &hw->mac; + + /* Set mta register count */ + mac->mta_reg_count = 128; + mac->rar_entry_count = IGC_RAR_ENTRIES; + + /* reset */ + mac->ops.reset_hw = igc_reset_hw_base; + + mac->ops.acquire_swfw_sync = igc_acquire_swfw_sync_i225; + mac->ops.release_swfw_sync = igc_release_swfw_sync_i225; + + /* Allow a single clear of the SW semaphore on I225 */ + if (mac->type == igc_i225) + dev_spec->clear_semaphore_once = true; + + /* physical interface link setup */ + mac->ops.setup_physical_interface = igc_setup_copper_link_base; + + return 0; +} + +/** + * igc_init_phy_params_base - Init PHY func ptrs. + * @hw: pointer to the HW structure + */ +static s32 igc_init_phy_params_base(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u32 ctrl_ext; + + if (hw->phy.media_type != igc_media_type_copper) { + phy->type = igc_phy_none; + goto out; + } + + phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT_2500; + phy->reset_delay_us = 100; + + ctrl_ext = rd32(IGC_CTRL_EXT); + + /* set lan id */ + hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >> + IGC_STATUS_FUNC_SHIFT; + + /* Make sure the PHY is in a good state. Several people have reported + * firmware leaving the PHY's page select register set to something + * other than the default of zero, which causes the PHY ID read to + * access something other than the intended register. + */ + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + hw_dbg("Error resetting the PHY.\n"); + goto out; + } + + ret_val = igc_get_phy_id_base(hw); + if (ret_val) + return ret_val; + + igc_check_for_link_base(hw); + + /* Verify phy id and set remaining function pointers */ + switch (phy->id) { + case I225_I_PHY_ID: + phy->type = igc_phy_i225; + break; + default: + ret_val = -IGC_ERR_PHY; + goto out; + } + +out: + return ret_val; +} + +static s32 igc_get_invariants_base(struct igc_hw *hw) +{ + struct igc_mac_info *mac = &hw->mac; + u32 link_mode = 0; + u32 ctrl_ext = 0; + s32 ret_val = 0; + + switch (hw->device_id) { + case IGC_DEV_ID_I225_LM: + case IGC_DEV_ID_I225_V: + mac->type = igc_i225; + break; + default: + return -IGC_ERR_MAC_INIT; + } + + hw->phy.media_type = igc_media_type_copper; + + ctrl_ext = rd32(IGC_CTRL_EXT); + link_mode = ctrl_ext & IGC_CTRL_EXT_LINK_MODE_MASK; + + /* mac initialization and operations */ + ret_val = igc_init_mac_params_base(hw); + if (ret_val) + goto out; + + /* NVM initialization */ + ret_val = igc_init_nvm_params_base(hw); + switch (hw->mac.type) { + case igc_i225: + ret_val = igc_init_nvm_params_i225(hw); + break; + default: + break; + } + + /* setup PHY parameters */ + ret_val = igc_init_phy_params_base(hw); + if (ret_val) + goto out; + +out: + return ret_val; +} + +/** + * igc_acquire_phy_base - Acquire rights to access PHY + * @hw: pointer to the HW structure + * + * Acquire access rights to the correct PHY. This is a + * function pointer entry point called by the api module. + */ +static s32 igc_acquire_phy_base(struct igc_hw *hw) +{ + u16 mask = IGC_SWFW_PHY0_SM; + + return hw->mac.ops.acquire_swfw_sync(hw, mask); +} + +/** + * igc_release_phy_base - Release rights to access PHY + * @hw: pointer to the HW structure + * + * A wrapper to release access rights to the correct PHY. This is a + * function pointer entry point called by the api module. + */ +static void igc_release_phy_base(struct igc_hw *hw) +{ + u16 mask = IGC_SWFW_PHY0_SM; + + hw->mac.ops.release_swfw_sync(hw, mask); +} + +/** + * igc_get_link_up_info_base - Get link speed/duplex info + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * This is a wrapper function, if using the serial gigabit media independent + * interface, use PCS to retrieve the link speed and duplex information. + * Otherwise, use the generic function to get the link speed and duplex info. + */ +static s32 igc_get_link_up_info_base(struct igc_hw *hw, u16 *speed, + u16 *duplex) +{ + s32 ret_val; + + ret_val = igc_get_speed_and_duplex_copper(hw, speed, duplex); + + return ret_val; +} + +/** + * igc_init_hw_base - Initialize hardware + * @hw: pointer to the HW structure + * + * This inits the hardware readying it for operation. + */ +static s32 igc_init_hw_base(struct igc_hw *hw) +{ + struct igc_mac_info *mac = &hw->mac; + u16 i, rar_count = mac->rar_entry_count; + s32 ret_val = 0; + + /* Setup the receive address */ + igc_init_rx_addrs(hw, rar_count); + + /* Zero out the Multicast HASH table */ + hw_dbg("Zeroing the MTA\n"); + for (i = 0; i < mac->mta_reg_count; i++) + array_wr32(IGC_MTA, i, 0); + + /* Zero out the Unicast HASH table */ + hw_dbg("Zeroing the UTA\n"); + for (i = 0; i < mac->uta_reg_count; i++) + array_wr32(IGC_UTA, i, 0); + + /* Setup link and flow control */ + ret_val = igc_setup_link(hw); + + /* Clear all of the statistics registers (clear on read). It is + * important that we do this after we have tried to establish link + * because the symbol error count will increment wildly if there + * is no link. + */ + igc_clear_hw_cntrs_base(hw); + + return ret_val; +} + +/** + * igc_read_mac_addr_base - Read device MAC address + * @hw: pointer to the HW structure + */ +static s32 igc_read_mac_addr_base(struct igc_hw *hw) +{ + s32 ret_val = 0; + + ret_val = igc_read_mac_addr(hw); + + return ret_val; +} + +/** + * igc_power_down_phy_copper_base - Remove link during PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, remove the link. + */ +void igc_power_down_phy_copper_base(struct igc_hw *hw) +{ + /* If the management interface is not enabled, then power down */ + if (!(igc_enable_mng_pass_thru(hw) || igc_check_reset_block(hw))) + igc_power_down_phy_copper(hw); +} + +/** + * igc_rx_fifo_flush_base - Clean rx fifo after Rx enable + * @hw: pointer to the HW structure + * + * After Rx enable, if manageability is enabled then there is likely some + * bad data at the start of the fifo and possibly in the DMA fifo. This + * function clears the fifos and flushes any packets that came in as rx was + * being enabled. + */ +void igc_rx_fifo_flush_base(struct igc_hw *hw) +{ + u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; + int i, ms_wait; + + /* disable IPv6 options as per hardware errata */ + rfctl = rd32(IGC_RFCTL); + rfctl |= IGC_RFCTL_IPV6_EX_DIS; + wr32(IGC_RFCTL, rfctl); + + if (!(rd32(IGC_MANC) & IGC_MANC_RCV_TCO_EN)) + return; + + /* Disable all Rx queues */ + for (i = 0; i < 4; i++) { + rxdctl[i] = rd32(IGC_RXDCTL(i)); + wr32(IGC_RXDCTL(i), + rxdctl[i] & ~IGC_RXDCTL_QUEUE_ENABLE); + } + /* Poll all queues to verify they have shut down */ + for (ms_wait = 0; ms_wait < 10; ms_wait++) { + usleep_range(1000, 2000); + rx_enabled = 0; + for (i = 0; i < 4; i++) + rx_enabled |= rd32(IGC_RXDCTL(i)); + if (!(rx_enabled & IGC_RXDCTL_QUEUE_ENABLE)) + break; + } + + if (ms_wait == 10) + pr_debug("Queue disable timed out after 10ms\n"); + + /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all + * incoming packets are rejected. Set enable and wait 2ms so that + * any packet that was coming in as RCTL.EN was set is flushed + */ + wr32(IGC_RFCTL, rfctl & ~IGC_RFCTL_LEF); + + rlpml = rd32(IGC_RLPML); + wr32(IGC_RLPML, 0); + + rctl = rd32(IGC_RCTL); + temp_rctl = rctl & ~(IGC_RCTL_EN | IGC_RCTL_SBP); + temp_rctl |= IGC_RCTL_LPE; + + wr32(IGC_RCTL, temp_rctl); + wr32(IGC_RCTL, temp_rctl | IGC_RCTL_EN); + wrfl(); + usleep_range(2000, 3000); + + /* Enable Rx queues that were previously enabled and restore our + * previous state + */ + for (i = 0; i < 4; i++) + wr32(IGC_RXDCTL(i), rxdctl[i]); + wr32(IGC_RCTL, rctl); + wrfl(); + + wr32(IGC_RLPML, rlpml); + wr32(IGC_RFCTL, rfctl); + + /* Flush receive errors generated by workaround */ + rd32(IGC_ROC); + rd32(IGC_RNBC); + rd32(IGC_MPC); +} + +static struct igc_mac_operations igc_mac_ops_base = { + .init_hw = igc_init_hw_base, + .check_for_link = igc_check_for_link_base, + .rar_set = igc_rar_set, + .read_mac_addr = igc_read_mac_addr_base, + .get_speed_and_duplex = igc_get_link_up_info_base, +}; + +static const struct igc_phy_operations igc_phy_ops_base = { + .acquire = igc_acquire_phy_base, + .release = igc_release_phy_base, + .reset = igc_phy_hw_reset, + .read_reg = igc_read_phy_reg_gpy, + .write_reg = igc_write_phy_reg_gpy, +}; + +const struct igc_info igc_base_info = { + .get_invariants = igc_get_invariants_base, + .mac_ops = &igc_mac_ops_base, + .phy_ops = &igc_phy_ops_base, +}; diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h new file mode 100644 index 000000000000..35588fa7b8c5 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_BASE_H +#define _IGC_BASE_H + +/* forward declaration */ +void igc_rx_fifo_flush_base(struct igc_hw *hw); +void igc_power_down_phy_copper_base(struct igc_hw *hw); + +/* Transmit Descriptor - Advanced */ +union igc_adv_tx_desc { + struct { + __le64 buffer_addr; /* Address of descriptor's data buf */ + __le32 cmd_type_len; + __le32 olinfo_status; + } read; + struct { + __le64 rsvd; /* Reserved */ + __le32 nxtseq_seed; + __le32 status; + } wb; +}; + +/* Adv Transmit Descriptor Config Masks */ +#define IGC_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */ +#define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ +#define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ +#define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ +#define IGC_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define IGC_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ +#define IGC_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ +#define IGC_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ +#define IGC_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ + +#define IGC_RAR_ENTRIES 16 + +struct igc_adv_data_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + u32 data; + struct { + u32 datalen:16; /* Data buffer length */ + u32 rsvd:4; + u32 dtyp:4; /* Descriptor type */ + u32 dcmd:8; /* Descriptor command */ + } config; + } lower; + union { + u32 data; + struct { + u32 status:4; /* Descriptor status */ + u32 idx:4; + u32 popts:6; /* Packet Options */ + u32 paylen:18; /* Payload length */ + } options; + } upper; +}; + +/* Receive Descriptor - Advanced */ +union igc_adv_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + } read; + struct { + struct { + union { + __le32 data; + struct { + __le16 pkt_info; /*RSS type, Pkt type*/ + /* Split Header, header buffer len */ + __le16 hdr_info; + } hs_rss; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; /* Packet length */ + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +/* Adv Transmit Descriptor Config Masks */ +#define IGC_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ + +/* Additional Transmit Descriptor Control definitions */ +#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ + +/* Additional Receive Descriptor Control definitions */ +#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ + +/* SRRCTL bit definitions */ +#define IGC_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ +#define IGC_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ +#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 + +#endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h new file mode 100644 index 000000000000..8740754ea1fd --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -0,0 +1,389 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_DEFINES_H_ +#define _IGC_DEFINES_H_ + +#define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ + +/* PCI Bus Info */ +#define PCIE_DEVICE_CONTROL2 0x28 +#define PCIE_DEVICE_CONTROL2_16ms 0x0005 + +/* Physical Func Reset Done Indication */ +#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000 + +/* Loop limit on how long we wait for auto-negotiation to complete */ +#define COPPER_LINK_UP_LIMIT 10 +#define PHY_AUTO_NEG_LIMIT 45 +#define PHY_FORCE_LIMIT 20 + +/* Number of 100 microseconds we wait for PCI Express master disable */ +#define MASTER_DISABLE_TIMEOUT 800 +/*Blocks new Master requests */ +#define IGC_CTRL_GIO_MASTER_DISABLE 0x00000004 +/* Status of Master requests. */ +#define IGC_STATUS_GIO_MASTER_ENABLE 0x00080000 + +/* PCI Express Control */ +#define IGC_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define IGC_GCR_CMPL_TMOUT_10ms 0x00001000 +#define IGC_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define IGC_GCR_CAP_VER2 0x00040000 + +/* Receive Address + * Number of high/low register pairs in the RAR. The RAR (Receive Address + * Registers) holds the directed and multicast addresses that we monitor. + * Technically, we have 16 spots. However, we reserve one of these spots + * (RAR[15]) for our directed address used by controllers with + * manageability enabled, allowing us room for 15 multicast addresses. + */ +#define IGC_RAH_AV 0x80000000 /* Receive descriptor valid */ +#define IGC_RAH_POOL_1 0x00040000 +#define IGC_RAL_MAC_ADDR_LEN 4 +#define IGC_RAH_MAC_ADDR_LEN 2 + +/* Error Codes */ +#define IGC_SUCCESS 0 +#define IGC_ERR_NVM 1 +#define IGC_ERR_PHY 2 +#define IGC_ERR_CONFIG 3 +#define IGC_ERR_PARAM 4 +#define IGC_ERR_MAC_INIT 5 +#define IGC_ERR_RESET 9 +#define IGC_ERR_MASTER_REQUESTS_PENDING 10 +#define IGC_ERR_BLK_PHY_RESET 12 +#define IGC_ERR_SWFW_SYNC 13 + +/* Device Control */ +#define IGC_CTRL_RST 0x04000000 /* Global reset */ + +#define IGC_CTRL_PHY_RST 0x80000000 /* PHY Reset */ +#define IGC_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ +#define IGC_CTRL_FRCSPD 0x00000800 /* Force Speed */ +#define IGC_CTRL_FRCDPX 0x00001000 /* Force Duplex */ + +#define IGC_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ +#define IGC_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ + +#define IGC_CONNSW_AUTOSENSE_EN 0x1 + +/* PBA constants */ +#define IGC_PBA_34K 0x0022 + +/* SW Semaphore Register */ +#define IGC_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ +#define IGC_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ + +/* SWFW_SYNC Definitions */ +#define IGC_SWFW_EEP_SM 0x1 +#define IGC_SWFW_PHY0_SM 0x2 + +/* Autoneg Advertisement Register */ +#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ +#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ +#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ +#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ +#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ +#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ + +/* Link Partner Ability Register (Base Page) */ +#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ +#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asymmetric Pause Direction bit */ + +/* 1000BASE-T Control Register */ +#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ +#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ +#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ + +/* 1000BASE-T Status Register */ +#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ +#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ + +/* PHY GPY 211 registers */ +#define STANDARD_AN_REG_MASK 0x0007 /* MMD */ +#define ANEG_MULTIGBT_AN_CTRL 0x0020 /* MULTI GBT AN Control Register */ +#define MMD_DEVADDR_SHIFT 16 /* Shift MMD to higher bits */ +#define CR_2500T_FD_CAPS 0x0080 /* Advertise 2500T FD capability */ + +/* NVM Control */ +/* Number of milliseconds for NVM auto read done after MAC reset. */ +#define AUTO_READ_DONE_TIMEOUT 10 +#define IGC_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ +#define IGC_EECD_REQ 0x00000040 /* NVM Access Request */ +#define IGC_EECD_GNT 0x00000080 /* NVM Access Grant */ +/* NVM Addressing bits based on type 0=small, 1=large */ +#define IGC_EECD_ADDR_BITS 0x00000400 +#define IGC_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ +#define IGC_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ +#define IGC_EECD_SIZE_EX_SHIFT 11 +#define IGC_EECD_FLUPD_I225 0x00800000 /* Update FLASH */ +#define IGC_EECD_FLUDONE_I225 0x04000000 /* Update FLASH done*/ +#define IGC_EECD_FLASH_DETECTED_I225 0x00080000 /* FLASH detected */ +#define IGC_FLUDONE_ATTEMPTS 20000 +#define IGC_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ + +/* Offset to data in NVM read/write registers */ +#define IGC_NVM_RW_REG_DATA 16 +#define IGC_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ +#define IGC_NVM_RW_REG_START 1 /* Start operation */ +#define IGC_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ +#define IGC_NVM_POLL_READ 0 /* Flag for polling for read complete */ + +/* NVM Word Offsets */ +#define NVM_CHECKSUM_REG 0x003F + +/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ +#define NVM_SUM 0xBABA + +#define NVM_PBA_OFFSET_0 8 +#define NVM_PBA_OFFSET_1 9 +#define NVM_RESERVED_WORD 0xFFFF +#define NVM_PBA_PTR_GUARD 0xFAFA +#define NVM_WORD_SIZE_BASE_SHIFT 6 + +/* Collision related configuration parameters */ +#define IGC_COLLISION_THRESHOLD 15 +#define IGC_CT_SHIFT 4 +#define IGC_COLLISION_DISTANCE 63 +#define IGC_COLD_SHIFT 12 + +/* Device Status */ +#define IGC_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ +#define IGC_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ +#define IGC_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ +#define IGC_STATUS_FUNC_SHIFT 2 +#define IGC_STATUS_FUNC_1 0x00000004 /* Function 1 */ +#define IGC_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define IGC_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ +#define IGC_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +#define IGC_STATUS_SPEED_2500 0x00400000 /* Speed 2.5Gb/s */ + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_2500 2500 +#define HALF_DUPLEX 1 +#define FULL_DUPLEX 2 + +/* 1Gbps and 2.5Gbps half duplex is not supported, nor spec-compliant. */ +#define ADVERTISE_10_HALF 0x0001 +#define ADVERTISE_10_FULL 0x0002 +#define ADVERTISE_100_HALF 0x0004 +#define ADVERTISE_100_FULL 0x0008 +#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ +#define ADVERTISE_1000_FULL 0x0020 +#define ADVERTISE_2500_HALF 0x0040 /* Not used, just FYI */ +#define ADVERTISE_2500_FULL 0x0080 + +#define IGC_ALL_SPEED_DUPLEX_2500 ( \ + ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ + ADVERTISE_100_FULL | ADVERTISE_1000_FULL | ADVERTISE_2500_FULL) + +#define AUTONEG_ADVERTISE_SPEED_DEFAULT_2500 IGC_ALL_SPEED_DUPLEX_2500 + +/* Interrupt Cause Read */ +#define IGC_ICR_TXDW BIT(0) /* Transmit desc written back */ +#define IGC_ICR_TXQE BIT(1) /* Transmit Queue empty */ +#define IGC_ICR_LSC BIT(2) /* Link Status Change */ +#define IGC_ICR_RXSEQ BIT(3) /* Rx sequence error */ +#define IGC_ICR_RXDMT0 BIT(4) /* Rx desc min. threshold (0) */ +#define IGC_ICR_RXO BIT(6) /* Rx overrun */ +#define IGC_ICR_RXT0 BIT(7) /* Rx timer intr (ring 0) */ +#define IGC_ICR_DRSTA BIT(30) /* Device Reset Asserted */ + +/* If this bit asserted, the driver should claim the interrupt */ +#define IGC_ICR_INT_ASSERTED BIT(31) + +#define IGC_ICS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */ + +#define IMS_ENABLE_MASK ( \ + IGC_IMS_RXT0 | \ + IGC_IMS_TXDW | \ + IGC_IMS_RXDMT0 | \ + IGC_IMS_RXSEQ | \ + IGC_IMS_LSC) + +/* Interrupt Mask Set */ +#define IGC_IMS_TXDW IGC_ICR_TXDW /* Tx desc written back */ +#define IGC_IMS_RXSEQ IGC_ICR_RXSEQ /* Rx sequence error */ +#define IGC_IMS_LSC IGC_ICR_LSC /* Link Status Change */ +#define IGC_IMS_DOUTSYNC IGC_ICR_DOUTSYNC /* NIC DMA out of sync */ +#define IGC_IMS_DRSTA IGC_ICR_DRSTA /* Device Reset Asserted */ +#define IGC_IMS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */ +#define IGC_IMS_RXDMT0 IGC_ICR_RXDMT0 /* Rx desc min. threshold */ + +#define IGC_QVECTOR_MASK 0x7FFC /* Q-vector mask */ +#define IGC_ITR_VAL_MASK 0x04 /* ITR value mask */ + +/* Interrupt Cause Set */ +#define IGC_ICS_LSC IGC_ICR_LSC /* Link Status Change */ +#define IGC_ICS_RXDMT0 IGC_ICR_RXDMT0 /* rx desc min. threshold */ +#define IGC_ICS_DRSTA IGC_ICR_DRSTA /* Device Reset Aserted */ + +#define IGC_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ +#define IGC_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ +#define IGC_IVAR_VALID 0x80 +#define IGC_GPIE_NSICR 0x00000001 +#define IGC_GPIE_MSIX_MODE 0x00000010 +#define IGC_GPIE_EIAME 0x40000000 +#define IGC_GPIE_PBA 0x80000000 + +/* Transmit Descriptor bit definitions */ +#define IGC_TXD_DTYP_D 0x00100000 /* Data Descriptor */ +#define IGC_TXD_DTYP_C 0x00000000 /* Context Descriptor */ +#define IGC_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define IGC_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define IGC_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define IGC_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define IGC_TXD_CMD_RS 0x08000000 /* Report Status */ +#define IGC_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ +#define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ +#define IGC_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define IGC_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ +#define IGC_TXD_STAT_DD 0x00000001 /* Descriptor Done */ +#define IGC_TXD_STAT_EC 0x00000002 /* Excess Collisions */ +#define IGC_TXD_STAT_LC 0x00000004 /* Late Collisions */ +#define IGC_TXD_STAT_TU 0x00000008 /* Transmit underrun */ +#define IGC_TXD_CMD_TCP 0x01000000 /* TCP packet */ +#define IGC_TXD_CMD_IP 0x02000000 /* IP packet */ +#define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define IGC_TXD_STAT_TC 0x00000004 /* Tx Underrun */ +#define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ + +/* Transmit Control */ +#define IGC_TCTL_EN 0x00000002 /* enable Tx */ +#define IGC_TCTL_PSP 0x00000008 /* pad short packets */ +#define IGC_TCTL_CT 0x00000ff0 /* collision threshold */ +#define IGC_TCTL_COLD 0x003ff000 /* collision distance */ +#define IGC_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define IGC_TCTL_MULR 0x10000000 /* Multiple request support */ + +#define IGC_CT_SHIFT 4 +#define IGC_COLLISION_THRESHOLD 15 + +/* Flow Control Constants */ +#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 +#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 +#define FLOW_CONTROL_TYPE 0x8808 +/* Enable XON frame transmission */ +#define IGC_FCRTL_XONE 0x80000000 + +/* Management Control */ +#define IGC_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ +#define IGC_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ + +/* Receive Control */ +#define IGC_RCTL_RST 0x00000001 /* Software reset */ +#define IGC_RCTL_EN 0x00000002 /* enable */ +#define IGC_RCTL_SBP 0x00000004 /* store bad packet */ +#define IGC_RCTL_UPE 0x00000008 /* unicast promisc enable */ +#define IGC_RCTL_MPE 0x00000010 /* multicast promisc enable */ +#define IGC_RCTL_LPE 0x00000020 /* long packet enable */ +#define IGC_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define IGC_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ + +#define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ +#define IGC_RCTL_BAM 0x00008000 /* broadcast enable */ + +/* Receive Descriptor bit definitions */ +#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ + +#define IGC_RXDEXT_STATERR_CE 0x01000000 +#define IGC_RXDEXT_STATERR_SE 0x02000000 +#define IGC_RXDEXT_STATERR_SEQ 0x04000000 +#define IGC_RXDEXT_STATERR_CXE 0x10000000 +#define IGC_RXDEXT_STATERR_TCPE 0x20000000 +#define IGC_RXDEXT_STATERR_IPE 0x40000000 +#define IGC_RXDEXT_STATERR_RXE 0x80000000 + +/* Same mask, but for extended and packet split descriptors */ +#define IGC_RXDEXT_ERR_FRAME_ERR_MASK ( \ + IGC_RXDEXT_STATERR_CE | \ + IGC_RXDEXT_STATERR_SE | \ + IGC_RXDEXT_STATERR_SEQ | \ + IGC_RXDEXT_STATERR_CXE | \ + IGC_RXDEXT_STATERR_RXE) + +/* Header split receive */ +#define IGC_RFCTL_IPV6_EX_DIS 0x00010000 +#define IGC_RFCTL_LEF 0x00040000 + +#define IGC_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */ + +#define IGC_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define IGC_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define IGC_RCTL_DPF 0x00400000 /* discard pause frames */ +#define IGC_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define IGC_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ + +#define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ +#define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ + +/* GPY211 - I225 defines */ +#define GPY_MMD_MASK 0xFFFF0000 +#define GPY_MMD_SHIFT 16 +#define GPY_REG_MASK 0x0000FFFF + +#define IGC_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ + +/* MAC definitions */ +#define IGC_FACTPS_MNGCG 0x20000000 +#define IGC_FWSM_MODE_MASK 0xE +#define IGC_FWSM_MODE_SHIFT 1 + +/* Management Control */ +#define IGC_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ +#define IGC_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ + +/* PHY */ +#define PHY_REVISION_MASK 0xFFFFFFF0 +#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ +#define IGC_GEN_POLL_TIMEOUT 1920 + +/* PHY Control Register */ +#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ +#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ +#define MII_CR_POWER_DOWN 0x0800 /* Power down */ +#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ +#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ +#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ +#define MII_CR_SPEED_1000 0x0040 +#define MII_CR_SPEED_100 0x2000 +#define MII_CR_SPEED_10 0x0000 + +/* PHY Status Register */ +#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ +#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ + +/* PHY 1000 MII Register/Bit Definitions */ +/* PHY Registers defined by IEEE */ +#define PHY_CONTROL 0x00 /* Control Register */ +#define PHY_STATUS 0x01 /* Status Register */ +#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ +#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ +#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ +#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ +#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ +#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ + +/* Bit definitions for valid PHY IDs. I = Integrated E = External */ +#define I225_I_PHY_ID 0x67C9DC00 + +/* MDI Control */ +#define IGC_MDIC_DATA_MASK 0x0000FFFF +#define IGC_MDIC_REG_MASK 0x001F0000 +#define IGC_MDIC_REG_SHIFT 16 +#define IGC_MDIC_PHY_MASK 0x03E00000 +#define IGC_MDIC_PHY_SHIFT 21 +#define IGC_MDIC_OP_WRITE 0x04000000 +#define IGC_MDIC_OP_READ 0x08000000 +#define IGC_MDIC_READY 0x10000000 +#define IGC_MDIC_INT_EN 0x20000000 +#define IGC_MDIC_ERROR 0x40000000 +#define IGC_MDIC_DEST 0x80000000 + +#define IGC_N0_QUEUE -1 + +#endif /* _IGC_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h new file mode 100644 index 000000000000..c50414f48f0d --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_HW_H_ +#define _IGC_HW_H_ + +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/netdevice.h> + +#include "igc_regs.h" +#include "igc_defines.h" +#include "igc_mac.h" +#include "igc_phy.h" +#include "igc_nvm.h" +#include "igc_i225.h" +#include "igc_base.h" + +#define IGC_DEV_ID_I225_LM 0x15F2 +#define IGC_DEV_ID_I225_V 0x15F3 + +#define IGC_FUNC_0 0 + +/* Function pointers for the MAC. */ +struct igc_mac_operations { + s32 (*check_for_link)(struct igc_hw *hw); + s32 (*reset_hw)(struct igc_hw *hw); + s32 (*init_hw)(struct igc_hw *hw); + s32 (*setup_physical_interface)(struct igc_hw *hw); + void (*rar_set)(struct igc_hw *hw, u8 *address, u32 index); + s32 (*read_mac_addr)(struct igc_hw *hw); + s32 (*get_speed_and_duplex)(struct igc_hw *hw, u16 *speed, + u16 *duplex); + s32 (*acquire_swfw_sync)(struct igc_hw *hw, u16 mask); + void (*release_swfw_sync)(struct igc_hw *hw, u16 mask); +}; + +enum igc_mac_type { + igc_undefined = 0, + igc_i225, + igc_num_macs /* List is 1-based, so subtract 1 for true count. */ +}; + +enum igc_phy_type { + igc_phy_unknown = 0, + igc_phy_none, + igc_phy_i225, +}; + +enum igc_media_type { + igc_media_type_unknown = 0, + igc_media_type_copper = 1, + igc_num_media_types +}; + +enum igc_nvm_type { + igc_nvm_unknown = 0, + igc_nvm_flash_hw, + igc_nvm_invm, +}; + +struct igc_info { + s32 (*get_invariants)(struct igc_hw *hw); + struct igc_mac_operations *mac_ops; + const struct igc_phy_operations *phy_ops; + struct igc_nvm_operations *nvm_ops; +}; + +extern const struct igc_info igc_base_info; + +struct igc_mac_info { + struct igc_mac_operations ops; + + u8 addr[ETH_ALEN]; + u8 perm_addr[ETH_ALEN]; + + enum igc_mac_type type; + + u32 collision_delta; + u32 ledctl_default; + u32 ledctl_mode1; + u32 ledctl_mode2; + u32 mc_filter_type; + u32 tx_packet_delta; + u32 txcw; + + u16 mta_reg_count; + u16 uta_reg_count; + + u16 rar_entry_count; + + u8 forced_speed_duplex; + + bool adaptive_ifs; + bool has_fwsm; + bool asf_firmware_present; + bool arc_subsystem_valid; + + bool autoneg; + bool autoneg_failed; + bool get_link_status; +}; + +struct igc_nvm_operations { + s32 (*acquire)(struct igc_hw *hw); + s32 (*read)(struct igc_hw *hw, u16 offset, u16 i, u16 *data); + void (*release)(struct igc_hw *hw); + s32 (*write)(struct igc_hw *hw, u16 offset, u16 i, u16 *data); + s32 (*update)(struct igc_hw *hw); + s32 (*validate)(struct igc_hw *hw); + s32 (*valid_led_default)(struct igc_hw *hw, u16 *data); +}; + +struct igc_phy_operations { + s32 (*acquire)(struct igc_hw *hw); + s32 (*check_polarity)(struct igc_hw *hw); + s32 (*check_reset_block)(struct igc_hw *hw); + s32 (*force_speed_duplex)(struct igc_hw *hw); + s32 (*get_cfg_done)(struct igc_hw *hw); + s32 (*get_cable_length)(struct igc_hw *hw); + s32 (*get_phy_info)(struct igc_hw *hw); + s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data); + void (*release)(struct igc_hw *hw); + s32 (*reset)(struct igc_hw *hw); + s32 (*write_reg)(struct igc_hw *hw, u32 address, u16 data); +}; + +struct igc_nvm_info { + struct igc_nvm_operations ops; + enum igc_nvm_type type; + + u32 flash_bank_size; + u32 flash_base_addr; + + u16 word_size; + u16 delay_usec; + u16 address_bits; + u16 opcode_bits; + u16 page_size; +}; + +struct igc_phy_info { + struct igc_phy_operations ops; + + enum igc_phy_type type; + + u32 addr; + u32 id; + u32 reset_delay_us; /* in usec */ + u32 revision; + + enum igc_media_type media_type; + + u16 autoneg_advertised; + u16 autoneg_mask; + u16 cable_length; + u16 max_cable_length; + u16 min_cable_length; + u16 pair_length[4]; + + u8 mdix; + + bool disable_polarity_correction; + bool is_mdix; + bool polarity_correction; + bool reset_disable; + bool speed_downgraded; + bool autoneg_wait_to_complete; +}; + +struct igc_bus_info { + u16 func; + u16 pci_cmd_word; +}; + +enum igc_fc_mode { + igc_fc_none = 0, + igc_fc_rx_pause, + igc_fc_tx_pause, + igc_fc_full, + igc_fc_default = 0xFF +}; + +struct igc_fc_info { + u32 high_water; /* Flow control high-water mark */ + u32 low_water; /* Flow control low-water mark */ + u16 pause_time; /* Flow control pause timer */ + bool send_xon; /* Flow control send XON */ + bool strict_ieee; /* Strict IEEE mode */ + enum igc_fc_mode current_mode; /* Type of flow control */ + enum igc_fc_mode requested_mode; +}; + +struct igc_dev_spec_base { + bool global_device_reset; + bool eee_disable; + bool clear_semaphore_once; + bool module_plugged; + u8 media_port; + bool mas_capable; +}; + +struct igc_hw { + void *back; + + u8 __iomem *hw_addr; + unsigned long io_base; + + struct igc_mac_info mac; + struct igc_fc_info fc; + struct igc_nvm_info nvm; + struct igc_phy_info phy; + + struct igc_bus_info bus; + + union { + struct igc_dev_spec_base _base; + } dev_spec; + + u16 device_id; + u16 subsystem_vendor_id; + u16 subsystem_device_id; + u16 vendor_id; + + u8 revision_id; +}; + +/* Statistics counters collected by the MAC */ +struct igc_hw_stats { + u64 crcerrs; + u64 algnerrc; + u64 symerrs; + u64 rxerrc; + u64 mpc; + u64 scc; + u64 ecol; + u64 mcc; + u64 latecol; + u64 colc; + u64 dc; + u64 tncrs; + u64 sec; + u64 cexterr; + u64 rlec; + u64 xonrxc; + u64 xontxc; + u64 xoffrxc; + u64 xofftxc; + u64 fcruc; + u64 prc64; + u64 prc127; + u64 prc255; + u64 prc511; + u64 prc1023; + u64 prc1522; + u64 gprc; + u64 bprc; + u64 mprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 rnbc; + u64 ruc; + u64 rfc; + u64 roc; + u64 rjc; + u64 mgprc; + u64 mgpdc; + u64 mgptc; + u64 tor; + u64 tot; + u64 tpr; + u64 tpt; + u64 ptc64; + u64 ptc127; + u64 ptc255; + u64 ptc511; + u64 ptc1023; + u64 ptc1522; + u64 mptc; + u64 bptc; + u64 tsctc; + u64 tsctfc; + u64 iac; + u64 icrxptc; + u64 icrxatc; + u64 ictxptc; + u64 ictxatc; + u64 ictxqec; + u64 ictxqmtc; + u64 icrxdmtc; + u64 icrxoc; + u64 cbtmpc; + u64 htdpmc; + u64 cbrdpc; + u64 cbrmpc; + u64 rpthc; + u64 hgptc; + u64 htcbdpc; + u64 hgorc; + u64 hgotc; + u64 lenerrs; + u64 scvpc; + u64 hrmpc; + u64 doosync; + u64 o2bgptc; + u64 o2bspc; + u64 b2ospc; + u64 b2ogprc; +}; + +struct net_device *igc_get_hw_dev(struct igc_hw *hw); +#define hw_dbg(format, arg...) \ + netdev_dbg(igc_get_hw_dev(hw), format, ##arg) + +s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); +s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); +void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); +void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); + +#endif /* _IGC_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c new file mode 100644 index 000000000000..c25f555aaf82 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -0,0 +1,490 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +#include <linux/delay.h> + +#include "igc_hw.h" + +/** + * igc_get_hw_semaphore_i225 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the necessary semaphores for exclusive access to the EEPROM. + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -IGC_ERR_NVM (-1). + */ +static s32 igc_acquire_nvm_i225(struct igc_hw *hw) +{ + return igc_acquire_swfw_sync_i225(hw, IGC_SWFW_EEP_SM); +} + +/** + * igc_release_nvm_i225 - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit, + * then release the semaphores acquired. + */ +static void igc_release_nvm_i225(struct igc_hw *hw) +{ + igc_release_swfw_sync_i225(hw, IGC_SWFW_EEP_SM); +} + +/** + * igc_get_hw_semaphore_i225 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + */ +static s32 igc_get_hw_semaphore_i225(struct igc_hw *hw) +{ + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + u32 swsm; + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = rd32(IGC_SWSM); + if (!(swsm & IGC_SWSM_SMBI)) + break; + + usleep_range(500, 600); + i++; + } + + if (i == timeout) { + /* In rare circumstances, the SW semaphore may already be held + * unintentionally. Clear the semaphore once before giving up. + */ + if (hw->dev_spec._base.clear_semaphore_once) { + hw->dev_spec._base.clear_semaphore_once = false; + igc_put_hw_semaphore(hw); + for (i = 0; i < timeout; i++) { + swsm = rd32(IGC_SWSM); + if (!(swsm & IGC_SWSM_SMBI)) + break; + + usleep_range(500, 600); + } + } + + /* If we do not have the semaphore here, we have to give up. */ + if (i == timeout) { + hw_dbg("Driver can't access device - SMBI bit is set.\n"); + return -IGC_ERR_NVM; + } + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = rd32(IGC_SWSM); + wr32(IGC_SWSM, swsm | IGC_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (rd32(IGC_SWSM) & IGC_SWSM_SWESMBI) + break; + + usleep_range(500, 600); + } + + if (i == timeout) { + /* Release semaphores */ + igc_put_hw_semaphore(hw); + hw_dbg("Driver can't access the NVM\n"); + return -IGC_ERR_NVM; + } + + return 0; +} + +/** + * igc_acquire_swfw_sync_i225 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + */ +s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask) +{ + s32 i = 0, timeout = 200; + u32 fwmask = mask << 16; + u32 swmask = mask; + s32 ret_val = 0; + u32 swfw_sync; + + while (i < timeout) { + if (igc_get_hw_semaphore_i225(hw)) { + ret_val = -IGC_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = rd32(IGC_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* Firmware currently using resource (fwmask) */ + igc_put_hw_semaphore(hw); + mdelay(5); + i++; + } + + if (i == timeout) { + hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -IGC_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + wr32(IGC_SW_FW_SYNC, swfw_sync); + + igc_put_hw_semaphore(hw); +out: + return ret_val; +} + +/** + * igc_release_swfw_sync_i225 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + */ +void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask) +{ + u32 swfw_sync; + + while (igc_get_hw_semaphore_i225(hw)) + ; /* Empty */ + + swfw_sync = rd32(IGC_SW_FW_SYNC); + swfw_sync &= ~mask; + wr32(IGC_SW_FW_SYNC, swfw_sync); + + igc_put_hw_semaphore(hw); +} + +/** + * igc_read_nvm_srrd_i225 - Reads Shadow Ram using EERD register + * @hw: pointer to the HW structure + * @offset: offset of word in the Shadow Ram to read + * @words: number of words to read + * @data: word read from the Shadow Ram + * + * Reads a 16 bit word from the Shadow Ram using the EERD register. + * Uses necessary synchronization semaphores. + */ +static s32 igc_read_nvm_srrd_i225(struct igc_hw *hw, u16 offset, u16 words, + u16 *data) +{ + s32 status = 0; + u16 i, count; + + /* We cannot hold synchronization semaphores for too long, + * because of forceful takeover procedure. However it is more efficient + * to read in bursts than synchronizing access for each word. + */ + for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) { + count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ? + IGC_EERD_EEWR_MAX_COUNT : (words - i); + + status = hw->nvm.ops.acquire(hw); + if (status) + break; + + status = igc_read_nvm_eerd(hw, offset, count, data + i); + hw->nvm.ops.release(hw); + if (status) + break; + } + + return status; +} + +/** + * igc_write_nvm_srwr - Write to Shadow Ram using EEWR + * @hw: pointer to the HW structure + * @offset: offset within the Shadow Ram to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the Shadow Ram + * + * Writes data to Shadow Ram at offset using EEWR register. + * + * If igc_update_nvm_checksum is not called after this function , the + * Shadow Ram will most likely contain an invalid checksum. + */ +static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, + u16 *data) +{ + struct igc_nvm_info *nvm = &hw->nvm; + u32 attempts = 100000; + u32 i, k, eewr = 0; + s32 ret_val = 0; + + /* A check for invalid values: offset too large, too many words, + * too many words for the offset, and not enough words. + */ + if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || + words == 0) { + hw_dbg("nvm parameter(s) out of bounds\n"); + ret_val = -IGC_ERR_NVM; + goto out; + } + + for (i = 0; i < words; i++) { + eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) | + (data[i] << IGC_NVM_RW_REG_DATA) | + IGC_NVM_RW_REG_START; + + wr32(IGC_SRWR, eewr); + + for (k = 0; k < attempts; k++) { + if (IGC_NVM_RW_REG_DONE & + rd32(IGC_SRWR)) { + ret_val = 0; + break; + } + udelay(5); + } + + if (ret_val) { + hw_dbg("Shadow RAM write EEWR timed out\n"); + break; + } + } + +out: + return ret_val; +} + +/** + * igc_write_nvm_srwr_i225 - Write to Shadow RAM using EEWR + * @hw: pointer to the HW structure + * @offset: offset within the Shadow RAM to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the Shadow RAM + * + * Writes data to Shadow RAM at offset using EEWR register. + * + * If igc_update_nvm_checksum is not called after this function , the + * data will not be committed to FLASH and also Shadow RAM will most likely + * contain an invalid checksum. + * + * If error code is returned, data and Shadow RAM may be inconsistent - buffer + * partially written. + */ +static s32 igc_write_nvm_srwr_i225(struct igc_hw *hw, u16 offset, u16 words, + u16 *data) +{ + s32 status = 0; + u16 i, count; + + /* We cannot hold synchronization semaphores for too long, + * because of forceful takeover procedure. However it is more efficient + * to write in bursts than synchronizing access for each word. + */ + for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) { + count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ? + IGC_EERD_EEWR_MAX_COUNT : (words - i); + + status = hw->nvm.ops.acquire(hw); + if (status) + break; + + status = igc_write_nvm_srwr(hw, offset, count, data + i); + hw->nvm.ops.release(hw); + if (status) + break; + } + + return status; +} + +/** + * igc_validate_nvm_checksum_i225 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + */ +static s32 igc_validate_nvm_checksum_i225(struct igc_hw *hw) +{ + s32 (*read_op_ptr)(struct igc_hw *hw, u16 offset, u16 count, + u16 *data); + s32 status = 0; + + status = hw->nvm.ops.acquire(hw); + if (status) + goto out; + + /* Replace the read function with semaphore grabbing with + * the one that skips this for a while. + * We have semaphore taken already here. + */ + read_op_ptr = hw->nvm.ops.read; + hw->nvm.ops.read = igc_read_nvm_eerd; + + status = igc_validate_nvm_checksum(hw); + + /* Revert original read operation. */ + hw->nvm.ops.read = read_op_ptr; + + hw->nvm.ops.release(hw); + +out: + return status; +} + +/** + * igc_pool_flash_update_done_i225 - Pool FLUDONE status + * @hw: pointer to the HW structure + */ +static s32 igc_pool_flash_update_done_i225(struct igc_hw *hw) +{ + s32 ret_val = -IGC_ERR_NVM; + u32 i, reg; + + for (i = 0; i < IGC_FLUDONE_ATTEMPTS; i++) { + reg = rd32(IGC_EECD); + if (reg & IGC_EECD_FLUDONE_I225) { + ret_val = 0; + break; + } + udelay(5); + } + + return ret_val; +} + +/** + * igc_update_flash_i225 - Commit EEPROM to the flash + * @hw: pointer to the HW structure + */ +static s32 igc_update_flash_i225(struct igc_hw *hw) +{ + s32 ret_val = 0; + u32 flup; + + ret_val = igc_pool_flash_update_done_i225(hw); + if (ret_val == -IGC_ERR_NVM) { + hw_dbg("Flash update time out\n"); + goto out; + } + + flup = rd32(IGC_EECD) | IGC_EECD_FLUPD_I225; + wr32(IGC_EECD, flup); + + ret_val = igc_pool_flash_update_done_i225(hw); + if (ret_val) + hw_dbg("Flash update time out\n"); + else + hw_dbg("Flash update complete\n"); + +out: + return ret_val; +} + +/** + * igc_update_nvm_checksum_i225 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. Next commit EEPROM data onto the Flash. + */ +static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw) +{ + u16 checksum = 0; + s32 ret_val = 0; + u16 i, nvm_data; + + /* Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + ret_val = igc_read_nvm_eerd(hw, 0, 1, &nvm_data); + if (ret_val) { + hw_dbg("EEPROM read failed\n"); + goto out; + } + + ret_val = hw->nvm.ops.acquire(hw); + if (ret_val) + goto out; + + /* Do not use hw->nvm.ops.write, hw->nvm.ops.read + * because we do not want to take the synchronization + * semaphores twice here. + */ + + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + ret_val = igc_read_nvm_eerd(hw, i, 1, &nvm_data); + if (ret_val) { + hw->nvm.ops.release(hw); + hw_dbg("NVM Read Error while updating checksum.\n"); + goto out; + } + checksum += nvm_data; + } + checksum = (u16)NVM_SUM - checksum; + ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, + &checksum); + if (ret_val) { + hw->nvm.ops.release(hw); + hw_dbg("NVM Write Error while updating checksum.\n"); + goto out; + } + + hw->nvm.ops.release(hw); + + ret_val = igc_update_flash_i225(hw); + +out: + return ret_val; +} + +/** + * igc_get_flash_presence_i225 - Check if flash device is detected + * @hw: pointer to the HW structure + */ +bool igc_get_flash_presence_i225(struct igc_hw *hw) +{ + bool ret_val = false; + u32 eec = 0; + + eec = rd32(IGC_EECD); + if (eec & IGC_EECD_FLASH_DETECTED_I225) + ret_val = true; + + return ret_val; +} + +/** + * igc_init_nvm_params_i225 - Init NVM func ptrs. + * @hw: pointer to the HW structure + */ +s32 igc_init_nvm_params_i225(struct igc_hw *hw) +{ + struct igc_nvm_info *nvm = &hw->nvm; + + nvm->ops.acquire = igc_acquire_nvm_i225; + nvm->ops.release = igc_release_nvm_i225; + + /* NVM Function Pointers */ + if (igc_get_flash_presence_i225(hw)) { + hw->nvm.type = igc_nvm_flash_hw; + nvm->ops.read = igc_read_nvm_srrd_i225; + nvm->ops.write = igc_write_nvm_srwr_i225; + nvm->ops.validate = igc_validate_nvm_checksum_i225; + nvm->ops.update = igc_update_nvm_checksum_i225; + } else { + hw->nvm.type = igc_nvm_invm; + nvm->ops.read = igc_read_nvm_eerd; + nvm->ops.write = NULL; + nvm->ops.validate = NULL; + nvm->ops.update = NULL; + } + return 0; +} diff --git a/drivers/net/ethernet/intel/igc/igc_i225.h b/drivers/net/ethernet/intel/igc/igc_i225.h new file mode 100644 index 000000000000..7b66e1f9c0e6 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_i225.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_I225_H_ +#define _IGC_I225_H_ + +s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask); +void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask); + +s32 igc_init_nvm_params_i225(struct igc_hw *hw); +bool igc_get_flash_presence_i225(struct igc_hw *hw); + +#endif diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c new file mode 100644 index 000000000000..f7683d3ae47c --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -0,0 +1,806 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +#include <linux/pci.h> +#include <linux/delay.h> + +#include "igc_mac.h" +#include "igc_hw.h" + +/* forward declaration */ +static s32 igc_set_default_fc(struct igc_hw *hw); +static s32 igc_set_fc_watermarks(struct igc_hw *hw); + +/** + * igc_disable_pcie_master - Disables PCI-express master access + * @hw: pointer to the HW structure + * + * Returns 0 (0) if successful, else returns -10 + * (-IGC_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused + * the master requests to be disabled. + * + * Disables PCI-Express master access and verifies there are no pending + * requests. + */ +s32 igc_disable_pcie_master(struct igc_hw *hw) +{ + s32 timeout = MASTER_DISABLE_TIMEOUT; + s32 ret_val = 0; + u32 ctrl; + + ctrl = rd32(IGC_CTRL); + ctrl |= IGC_CTRL_GIO_MASTER_DISABLE; + wr32(IGC_CTRL, ctrl); + + while (timeout) { + if (!(rd32(IGC_STATUS) & + IGC_STATUS_GIO_MASTER_ENABLE)) + break; + usleep_range(2000, 3000); + timeout--; + } + + if (!timeout) { + hw_dbg("Master requests are pending.\n"); + ret_val = -IGC_ERR_MASTER_REQUESTS_PENDING; + goto out; + } + +out: + return ret_val; +} + +/** + * igc_init_rx_addrs - Initialize receive addresses + * @hw: pointer to the HW structure + * @rar_count: receive address registers + * + * Setup the receive address registers by setting the base receive address + * register to the devices MAC address and clearing all the other receive + * address registers to 0. + */ +void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count) +{ + u8 mac_addr[ETH_ALEN] = {0}; + u32 i; + + /* Setup the receive address */ + hw_dbg("Programming MAC Address into RAR[0]\n"); + + hw->mac.ops.rar_set(hw, hw->mac.addr, 0); + + /* Zero out the other (rar_entry_count - 1) receive addresses */ + hw_dbg("Clearing RAR[1-%u]\n", rar_count - 1); + for (i = 1; i < rar_count; i++) + hw->mac.ops.rar_set(hw, mac_addr, i); +} + +/** + * igc_setup_link - Setup flow control and link settings + * @hw: pointer to the HW structure + * + * Determines which flow control settings to use, then configures flow + * control. Calls the appropriate media-specific link configuration + * function. Assuming the adapter has a valid link partner, a valid link + * should be established. Assumes the hardware has previously been reset + * and the transmitter and receiver are not enabled. + */ +s32 igc_setup_link(struct igc_hw *hw) +{ + s32 ret_val = 0; + + /* In the case of the phy reset being blocked, we already have a link. + * We do not need to set it up again. + */ + if (igc_check_reset_block(hw)) + goto out; + + /* If requested flow control is set to default, set flow control + * based on the EEPROM flow control settings. + */ + if (hw->fc.requested_mode == igc_fc_default) { + ret_val = igc_set_default_fc(hw); + if (ret_val) + goto out; + } + + /* We want to save off the original Flow Control configuration just + * in case we get disconnected and then reconnected into a different + * hub or switch with different Flow Control capabilities. + */ + hw->fc.current_mode = hw->fc.requested_mode; + + hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); + + /* Call the necessary media_type subroutine to configure the link. */ + ret_val = hw->mac.ops.setup_physical_interface(hw); + if (ret_val) + goto out; + + /* Initialize the flow control address, type, and PAUSE timer + * registers to their default values. This is done even if flow + * control is disabled, because it does not hurt anything to + * initialize these registers. + */ + hw_dbg("Initializing the Flow Control address, type and timer regs\n"); + wr32(IGC_FCT, FLOW_CONTROL_TYPE); + wr32(IGC_FCAH, FLOW_CONTROL_ADDRESS_HIGH); + wr32(IGC_FCAL, FLOW_CONTROL_ADDRESS_LOW); + + wr32(IGC_FCTTV, hw->fc.pause_time); + + ret_val = igc_set_fc_watermarks(hw); + +out: + return ret_val; +} + +/** + * igc_set_default_fc - Set flow control default values + * @hw: pointer to the HW structure + * + * Read the EEPROM for the default values for flow control and store the + * values. + */ +static s32 igc_set_default_fc(struct igc_hw *hw) +{ + hw->fc.requested_mode = igc_fc_full; + return 0; +} + +/** + * igc_force_mac_fc - Force the MAC's flow control settings + * @hw: pointer to the HW structure + * + * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the + * device control register to reflect the adapter settings. TFCE and RFCE + * need to be explicitly set by software when a copper PHY is used because + * autonegotiation is managed by the PHY rather than the MAC. Software must + * also configure these bits when link is forced on a fiber connection. + */ +s32 igc_force_mac_fc(struct igc_hw *hw) +{ + s32 ret_val = 0; + u32 ctrl; + + ctrl = rd32(IGC_CTRL); + + /* Because we didn't get link via the internal auto-negotiation + * mechanism (we either forced link or we got link via PHY + * auto-neg), we have to manually enable/disable transmit an + * receive flow control. + * + * The "Case" statement below enables/disable flow control + * according to the "hw->fc.current_mode" parameter. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause + * frames but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * frames but we do not receive pause frames). + * 3: Both Rx and TX flow control (symmetric) is enabled. + * other: No other values should be possible at this point. + */ + hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode); + + switch (hw->fc.current_mode) { + case igc_fc_none: + ctrl &= (~(IGC_CTRL_TFCE | IGC_CTRL_RFCE)); + break; + case igc_fc_rx_pause: + ctrl &= (~IGC_CTRL_TFCE); + ctrl |= IGC_CTRL_RFCE; + break; + case igc_fc_tx_pause: + ctrl &= (~IGC_CTRL_RFCE); + ctrl |= IGC_CTRL_TFCE; + break; + case igc_fc_full: + ctrl |= (IGC_CTRL_TFCE | IGC_CTRL_RFCE); + break; + default: + hw_dbg("Flow control param set incorrectly\n"); + ret_val = -IGC_ERR_CONFIG; + goto out; + } + + wr32(IGC_CTRL, ctrl); + +out: + return ret_val; +} + +/** + * igc_set_fc_watermarks - Set flow control high/low watermarks + * @hw: pointer to the HW structure + * + * Sets the flow control high/low threshold (watermark) registers. If + * flow control XON frame transmission is enabled, then set XON frame + * transmission as well. + */ +static s32 igc_set_fc_watermarks(struct igc_hw *hw) +{ + u32 fcrtl = 0, fcrth = 0; + + /* Set the flow control receive threshold registers. Normally, + * these registers will be set to a default threshold that may be + * adjusted later by the driver's runtime code. However, if the + * ability to transmit pause frames is not enabled, then these + * registers will be set to 0. + */ + if (hw->fc.current_mode & igc_fc_tx_pause) { + /* We need to set up the Receive Threshold high and low water + * marks as well as (optionally) enabling the transmission of + * XON frames. + */ + fcrtl = hw->fc.low_water; + if (hw->fc.send_xon) + fcrtl |= IGC_FCRTL_XONE; + + fcrth = hw->fc.high_water; + } + wr32(IGC_FCRTL, fcrtl); + wr32(IGC_FCRTH, fcrth); + + return 0; +} + +/** + * igc_clear_hw_cntrs_base - Clear base hardware counters + * @hw: pointer to the HW structure + * + * Clears the base hardware counters by reading the counter registers. + */ +void igc_clear_hw_cntrs_base(struct igc_hw *hw) +{ + rd32(IGC_CRCERRS); + rd32(IGC_SYMERRS); + rd32(IGC_MPC); + rd32(IGC_SCC); + rd32(IGC_ECOL); + rd32(IGC_MCC); + rd32(IGC_LATECOL); + rd32(IGC_COLC); + rd32(IGC_DC); + rd32(IGC_SEC); + rd32(IGC_RLEC); + rd32(IGC_XONRXC); + rd32(IGC_XONTXC); + rd32(IGC_XOFFRXC); + rd32(IGC_XOFFTXC); + rd32(IGC_FCRUC); + rd32(IGC_GPRC); + rd32(IGC_BPRC); + rd32(IGC_MPRC); + rd32(IGC_GPTC); + rd32(IGC_GORCL); + rd32(IGC_GORCH); + rd32(IGC_GOTCL); + rd32(IGC_GOTCH); + rd32(IGC_RNBC); + rd32(IGC_RUC); + rd32(IGC_RFC); + rd32(IGC_ROC); + rd32(IGC_RJC); + rd32(IGC_TORL); + rd32(IGC_TORH); + rd32(IGC_TOTL); + rd32(IGC_TOTH); + rd32(IGC_TPR); + rd32(IGC_TPT); + rd32(IGC_MPTC); + rd32(IGC_BPTC); + + rd32(IGC_PRC64); + rd32(IGC_PRC127); + rd32(IGC_PRC255); + rd32(IGC_PRC511); + rd32(IGC_PRC1023); + rd32(IGC_PRC1522); + rd32(IGC_PTC64); + rd32(IGC_PTC127); + rd32(IGC_PTC255); + rd32(IGC_PTC511); + rd32(IGC_PTC1023); + rd32(IGC_PTC1522); + + rd32(IGC_ALGNERRC); + rd32(IGC_RXERRC); + rd32(IGC_TNCRS); + rd32(IGC_CEXTERR); + rd32(IGC_TSCTC); + rd32(IGC_TSCTFC); + + rd32(IGC_MGTPRC); + rd32(IGC_MGTPDC); + rd32(IGC_MGTPTC); + + rd32(IGC_IAC); + rd32(IGC_ICRXOC); + + rd32(IGC_ICRXPTC); + rd32(IGC_ICRXATC); + rd32(IGC_ICTXPTC); + rd32(IGC_ICTXATC); + rd32(IGC_ICTXQEC); + rd32(IGC_ICTXQMTC); + rd32(IGC_ICRXDMTC); + + rd32(IGC_CBTMPC); + rd32(IGC_HTDPMC); + rd32(IGC_CBRMPC); + rd32(IGC_RPTHC); + rd32(IGC_HGPTC); + rd32(IGC_HTCBDPC); + rd32(IGC_HGORCL); + rd32(IGC_HGORCH); + rd32(IGC_HGOTCL); + rd32(IGC_HGOTCH); + rd32(IGC_LENERRS); +} + +/** + * igc_rar_set - Set receive address register + * @hw: pointer to the HW structure + * @addr: pointer to the receive address + * @index: receive address array register + * + * Sets the receive address array register at index to the address passed + * in by addr. + */ +void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index) +{ + u32 rar_low, rar_high; + + /* HW expects these in little endian so we reverse the byte order + * from network order (big endian) to little endian + */ + rar_low = ((u32)addr[0] | + ((u32)addr[1] << 8) | + ((u32)addr[2] << 16) | ((u32)addr[3] << 24)); + + rar_high = ((u32)addr[4] | ((u32)addr[5] << 8)); + + /* If MAC address zero, no need to set the AV bit */ + if (rar_low || rar_high) + rar_high |= IGC_RAH_AV; + + /* Some bridges will combine consecutive 32-bit writes into + * a single burst write, which will malfunction on some parts. + * The flushes avoid this. + */ + wr32(IGC_RAL(index), rar_low); + wrfl(); + wr32(IGC_RAH(index), rar_high); + wrfl(); +} + +/** + * igc_check_for_copper_link - Check for link (Copper) + * @hw: pointer to the HW structure + * + * Checks to see of the link status of the hardware has changed. If a + * change in link status has been detected, then we read the PHY registers + * to get the current speed/duplex if link exists. + */ +s32 igc_check_for_copper_link(struct igc_hw *hw) +{ + struct igc_mac_info *mac = &hw->mac; + s32 ret_val; + bool link; + + /* We only want to go out to the PHY registers to see if Auto-Neg + * has completed and/or if our link status has changed. The + * get_link_status flag is set upon receiving a Link Status + * Change or Rx Sequence Error interrupt. + */ + if (!mac->get_link_status) { + ret_val = 0; + goto out; + } + + /* First we want to see if the MII Status Register reports + * link. If so, then we want to get the current speed/duplex + * of the PHY. + */ + ret_val = igc_phy_has_link(hw, 1, 0, &link); + if (ret_val) + goto out; + + if (!link) + goto out; /* No link detected */ + + mac->get_link_status = false; + + /* Check if there was DownShift, must be checked + * immediately after link-up + */ + igc_check_downshift(hw); + + /* If we are forcing speed/duplex, then we simply return since + * we have already determined whether we have link or not. + */ + if (!mac->autoneg) { + ret_val = -IGC_ERR_CONFIG; + goto out; + } + + /* Auto-Neg is enabled. Auto Speed Detection takes care + * of MAC speed/duplex configuration. So we only need to + * configure Collision Distance in the MAC. + */ + igc_config_collision_dist(hw); + + /* Configure Flow Control now that Auto-Neg has completed. + * First, we need to restore the desired flow control + * settings because we may have had to re-autoneg with a + * different link partner. + */ + ret_val = igc_config_fc_after_link_up(hw); + if (ret_val) + hw_dbg("Error configuring flow control\n"); + +out: + return ret_val; +} + +/** + * igc_config_collision_dist - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + */ +void igc_config_collision_dist(struct igc_hw *hw) +{ + u32 tctl; + + tctl = rd32(IGC_TCTL); + + tctl &= ~IGC_TCTL_COLD; + tctl |= IGC_COLLISION_DISTANCE << IGC_COLD_SHIFT; + + wr32(IGC_TCTL, tctl); + wrfl(); +} + +/** + * igc_config_fc_after_link_up - Configures flow control after link + * @hw: pointer to the HW structure + * + * Checks the status of auto-negotiation after link up to ensure that the + * speed and duplex were not forced. If the link needed to be forced, then + * flow control needs to be forced also. If auto-negotiation is enabled + * and did not fail, then we configure flow control based on our link + * partner. + */ +s32 igc_config_fc_after_link_up(struct igc_hw *hw) +{ + u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; + struct igc_mac_info *mac = &hw->mac; + u16 speed, duplex; + s32 ret_val = 0; + + /* Check for the case where we have fiber media and auto-neg failed + * so we had to force link. In this case, we need to force the + * configuration of the MAC to match the "fc" parameter. + */ + if (mac->autoneg_failed) { + if (hw->phy.media_type == igc_media_type_copper) + ret_val = igc_force_mac_fc(hw); + } + + if (ret_val) { + hw_dbg("Error forcing flow control settings\n"); + goto out; + } + + /* Check for the case where we have copper media and auto-neg is + * enabled. In this case, we need to check and see if Auto-Neg + * has completed, and if so, how the PHY and link partner has + * flow control configured. + */ + if (hw->phy.media_type == igc_media_type_copper && mac->autoneg) { + /* Read the MII Status Register and check to see if AutoNeg + * has completed. We read this twice because this reg has + * some "sticky" (latched) bits. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, + &mii_status_reg); + if (ret_val) + goto out; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, + &mii_status_reg); + if (ret_val) + goto out; + + if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { + hw_dbg("Copper PHY and Auto Neg has not completed.\n"); + goto out; + } + + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement + * Register (Address 4) and the Auto_Negotiation Base + * Page Ability Register (Address 5) to determine how + * flow control was negotiated. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, + &mii_nway_adv_reg); + if (ret_val) + goto out; + ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, + &mii_nway_lp_ability_reg); + if (ret_val) + goto out; + /* Two bits in the Auto Negotiation Advertisement Register + * (Address 4) and two bits in the Auto Negotiation Base + * Page Ability Register (Address 5) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | igc_fc_none + * 0 | 1 | 0 | DC | igc_fc_none + * 0 | 1 | 1 | 0 | igc_fc_none + * 0 | 1 | 1 | 1 | igc_fc_tx_pause + * 1 | 0 | 0 | DC | igc_fc_none + * 1 | DC | 1 | DC | igc_fc_full + * 1 | 1 | 0 | 0 | igc_fc_none + * 1 | 1 | 0 | 1 | igc_fc_rx_pause + * + * Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | IGC_fc_full + * + */ + if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { + /* Now we need to check if the user selected RX ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise RX + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == igc_fc_full) { + hw->fc.current_mode = igc_fc_full; + hw_dbg("Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = igc_fc_rx_pause; + hw_dbg("Flow Control = RX PAUSE frames only.\n"); + } + } + + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | igc_fc_tx_pause + */ + else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = igc_fc_tx_pause; + hw_dbg("Flow Control = TX PAUSE frames only.\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | igc_fc_rx_pause + */ + else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = igc_fc_rx_pause; + hw_dbg("Flow Control = RX PAUSE frames only.\n"); + } + /* Per the IEEE spec, at this point flow control should be + * disabled. However, we want to consider that we could + * be connected to a legacy switch that doesn't advertise + * desired flow control, but can be forced on the link + * partner. So if we advertised no flow control, that is + * what we will resolve to. If we advertised some kind of + * receive capability (Rx Pause Only or Full Flow Control) + * and the link partner advertised none, we will configure + * ourselves to enable Rx Flow Control only. We can do + * this safely for two reasons: If the link partner really + * didn't want flow control enabled, and we enable Rx, no + * harm done since we won't be receiving any PAUSE frames + * anyway. If the intent on the link partner was to have + * flow control enabled, then by us enabling RX only, we + * can at least receive pause frames and process them. + * This is a good idea because in most cases, since we are + * predominantly a server NIC, more times than not we will + * be asked to delay transmission of packets than asking + * our link partner to pause transmission of frames. + */ + else if ((hw->fc.requested_mode == igc_fc_none) || + (hw->fc.requested_mode == igc_fc_tx_pause) || + (hw->fc.strict_ieee)) { + hw->fc.current_mode = igc_fc_none; + hw_dbg("Flow Control = NONE.\n"); + } else { + hw->fc.current_mode = igc_fc_rx_pause; + hw_dbg("Flow Control = RX PAUSE frames only.\n"); + } + + /* Now we need to do one last check... If we auto- + * negotiated to HALF DUPLEX, flow control should not be + * enabled per IEEE 802.3 spec. + */ + ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex); + if (ret_val) { + hw_dbg("Error getting link speed and duplex\n"); + goto out; + } + + if (duplex == HALF_DUPLEX) + hw->fc.current_mode = igc_fc_none; + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + ret_val = igc_force_mac_fc(hw); + if (ret_val) { + hw_dbg("Error forcing flow control settings\n"); + goto out; + } + } + +out: + return 0; +} + +/** + * igc_get_auto_rd_done - Check for auto read completion + * @hw: pointer to the HW structure + * + * Check EEPROM for Auto Read done bit. + */ +s32 igc_get_auto_rd_done(struct igc_hw *hw) +{ + s32 ret_val = 0; + s32 i = 0; + + while (i < AUTO_READ_DONE_TIMEOUT) { + if (rd32(IGC_EECD) & IGC_EECD_AUTO_RD) + break; + usleep_range(1000, 2000); + i++; + } + + if (i == AUTO_READ_DONE_TIMEOUT) { + hw_dbg("Auto read by HW from NVM has not completed.\n"); + ret_val = -IGC_ERR_RESET; + goto out; + } + +out: + return ret_val; +} + +/** + * igc_get_speed_and_duplex_copper - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Read the status register for the current speed/duplex and store the current + * speed and duplex for copper connections. + */ +s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed, + u16 *duplex) +{ + u32 status; + + status = rd32(IGC_STATUS); + if (status & IGC_STATUS_SPEED_1000) { + /* For I225, STATUS will indicate 1G speed in both 1 Gbps + * and 2.5 Gbps link modes. An additional bit is used + * to differentiate between 1 Gbps and 2.5 Gbps. + */ + if (hw->mac.type == igc_i225 && + (status & IGC_STATUS_SPEED_2500)) { + *speed = SPEED_2500; + hw_dbg("2500 Mbs, "); + } else { + *speed = SPEED_1000; + hw_dbg("1000 Mbs, "); + } + } else if (status & IGC_STATUS_SPEED_100) { + *speed = SPEED_100; + hw_dbg("100 Mbs, "); + } else { + *speed = SPEED_10; + hw_dbg("10 Mbs, "); + } + + if (status & IGC_STATUS_FD) { + *duplex = FULL_DUPLEX; + hw_dbg("Full Duplex\n"); + } else { + *duplex = HALF_DUPLEX; + hw_dbg("Half Duplex\n"); + } + + return 0; +} + +/** + * igc_put_hw_semaphore - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + */ +void igc_put_hw_semaphore(struct igc_hw *hw) +{ + u32 swsm; + + swsm = rd32(IGC_SWSM); + + swsm &= ~(IGC_SWSM_SMBI | IGC_SWSM_SWESMBI); + + wr32(IGC_SWSM, swsm); +} + +/** + * igc_enable_mng_pass_thru - Enable processing of ARP's + * @hw: pointer to the HW structure + * + * Verifies the hardware needs to leave interface enabled so that frames can + * be directed to and from the management interface. + */ +bool igc_enable_mng_pass_thru(struct igc_hw *hw) +{ + bool ret_val = false; + u32 fwsm, factps; + u32 manc; + + if (!hw->mac.asf_firmware_present) + goto out; + + manc = rd32(IGC_MANC); + + if (!(manc & IGC_MANC_RCV_TCO_EN)) + goto out; + + if (hw->mac.arc_subsystem_valid) { + fwsm = rd32(IGC_FWSM); + factps = rd32(IGC_FACTPS); + + if (!(factps & IGC_FACTPS_MNGCG) && + ((fwsm & IGC_FWSM_MODE_MASK) == + (igc_mng_mode_pt << IGC_FWSM_MODE_SHIFT))) { + ret_val = true; + goto out; + } + } else { + if ((manc & IGC_MANC_SMBUS_EN) && + !(manc & IGC_MANC_ASF_EN)) { + ret_val = true; + goto out; + } + } + +out: + return ret_val; +} diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h new file mode 100644 index 000000000000..782bc995badc --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_mac.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_MAC_H_ +#define _IGC_MAC_H_ + +#include "igc_hw.h" +#include "igc_phy.h" +#include "igc_defines.h" + +#ifndef IGC_REMOVED +#define IGC_REMOVED(a) (0) +#endif /* IGC_REMOVED */ + +/* forward declaration */ +s32 igc_disable_pcie_master(struct igc_hw *hw); +s32 igc_check_for_copper_link(struct igc_hw *hw); +s32 igc_config_fc_after_link_up(struct igc_hw *hw); +s32 igc_force_mac_fc(struct igc_hw *hw); +void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count); +s32 igc_setup_link(struct igc_hw *hw); +void igc_clear_hw_cntrs_base(struct igc_hw *hw); +s32 igc_get_auto_rd_done(struct igc_hw *hw); +void igc_put_hw_semaphore(struct igc_hw *hw); +void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index); +void igc_config_collision_dist(struct igc_hw *hw); + +s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed, + u16 *duplex); + +bool igc_enable_mng_pass_thru(struct igc_hw *hw); + +enum igc_mng_mode { + igc_mng_mode_none = 0, + igc_mng_mode_asf, + igc_mng_mode_pt, + igc_mng_mode_ipmi, + igc_mng_mode_host_if_only +}; + +#endif diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c new file mode 100644 index 000000000000..9d85707e8a81 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -0,0 +1,3901 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/if_vlan.h> +#include <linux/aer.h> + +#include "igc.h" +#include "igc_hw.h" + +#define DRV_VERSION "0.0.1-k" +#define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" + +static int debug = -1; + +MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); +MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +char igc_driver_name[] = "igc"; +char igc_driver_version[] = DRV_VERSION; +static const char igc_driver_string[] = DRV_SUMMARY; +static const char igc_copyright[] = + "Copyright(c) 2018 Intel Corporation."; + +static const struct igc_info *igc_info_tbl[] = { + [board_base] = &igc_base_info, +}; + +static const struct pci_device_id igc_pci_tbl[] = { + { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, + { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, + /* required last entry */ + {0, } +}; + +MODULE_DEVICE_TABLE(pci, igc_pci_tbl); + +/* forward declaration */ +static void igc_clean_tx_ring(struct igc_ring *tx_ring); +static int igc_sw_init(struct igc_adapter *); +static void igc_configure(struct igc_adapter *adapter); +static void igc_power_down_link(struct igc_adapter *adapter); +static void igc_set_default_mac_filter(struct igc_adapter *adapter); +static void igc_set_rx_mode(struct net_device *netdev); +static void igc_write_itr(struct igc_q_vector *q_vector); +static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector); +static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx); +static void igc_set_interrupt_capability(struct igc_adapter *adapter, + bool msix); +static void igc_free_q_vectors(struct igc_adapter *adapter); +static void igc_irq_disable(struct igc_adapter *adapter); +static void igc_irq_enable(struct igc_adapter *adapter); +static void igc_configure_msix(struct igc_adapter *adapter); +static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, + struct igc_rx_buffer *bi); + +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; + +static void igc_reset(struct igc_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct igc_hw *hw = &adapter->hw; + + hw->mac.ops.reset_hw(hw); + + if (hw->mac.ops.init_hw(hw)) + dev_err(&pdev->dev, "Hardware Error\n"); + + if (!netif_running(adapter->netdev)) + igc_power_down_link(adapter); + + igc_get_phy_info(hw); +} + +/** + * igc_power_up_link - Power up the phy/serdes link + * @adapter: address of board private structure + */ +static void igc_power_up_link(struct igc_adapter *adapter) +{ + igc_reset_phy(&adapter->hw); + + if (adapter->hw.phy.media_type == igc_media_type_copper) + igc_power_up_phy_copper(&adapter->hw); + + igc_setup_link(&adapter->hw); +} + +/** + * igc_power_down_link - Power down the phy/serdes link + * @adapter: address of board private structure + */ +static void igc_power_down_link(struct igc_adapter *adapter) +{ + if (adapter->hw.phy.media_type == igc_media_type_copper) + igc_power_down_phy_copper_base(&adapter->hw); +} + +/** + * igc_release_hw_control - release control of the h/w to f/w + * @adapter: address of board private structure + * + * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. + */ +static void igc_release_hw_control(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 ctrl_ext; + + /* Let firmware take over control of h/w */ + ctrl_ext = rd32(IGC_CTRL_EXT); + wr32(IGC_CTRL_EXT, + ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); +} + +/** + * igc_get_hw_control - get control of the h/w from f/w + * @adapter: address of board private structure + * + * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. + */ +static void igc_get_hw_control(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 ctrl_ext; + + /* Let firmware know the driver has taken over */ + ctrl_ext = rd32(IGC_CTRL_EXT); + wr32(IGC_CTRL_EXT, + ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); +} + +/** + * igc_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + */ +static void igc_free_tx_resources(struct igc_ring *tx_ring) +{ + igc_clean_tx_ring(tx_ring); + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!tx_ring->desc) + return; + + dma_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + + tx_ring->desc = NULL; +} + +/** + * igc_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + */ +static void igc_free_all_tx_resources(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igc_free_tx_resources(adapter->tx_ring[i]); +} + +/** + * igc_clean_tx_ring - Free Tx Buffers + * @tx_ring: ring to be cleaned + */ +static void igc_clean_tx_ring(struct igc_ring *tx_ring) +{ + u16 i = tx_ring->next_to_clean; + struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; + + while (i != tx_ring->next_to_use) { + union igc_adv_tx_desc *eop_desc, *tx_desc; + + /* Free all the Tx ring sk_buffs */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + + /* check for eop_desc to determine the end of the packet */ + eop_desc = tx_buffer->next_to_watch; + tx_desc = IGC_TX_DESC(tx_ring, i); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGC_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + } + } + + /* reset BQL for queue */ + netdev_tx_reset_queue(txring_txq(tx_ring)); + + /* reset next_to_use and next_to_clean */ + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; +} + +/** + * igc_clean_all_tx_rings - Free Tx Buffers for all queues + * @adapter: board private structure + */ +static void igc_clean_all_tx_rings(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + if (adapter->tx_ring[i]) + igc_clean_tx_ring(adapter->tx_ring[i]); +} + +/** + * igc_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup + * + * Return 0 on success, negative on failure + */ +static int igc_setup_tx_resources(struct igc_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int size = 0; + + size = sizeof(struct igc_tx_buffer) * tx_ring->count; + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + goto err; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + + if (!tx_ring->desc) + goto err; + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + + return 0; + +err: + vfree(tx_ring->tx_buffer_info); + dev_err(dev, + "Unable to allocate memory for the transmit descriptor ring\n"); + return -ENOMEM; +} + +/** + * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + */ +static int igc_setup_all_tx_resources(struct igc_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int i, err = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + err = igc_setup_tx_resources(adapter->tx_ring[i]); + if (err) { + dev_err(&pdev->dev, + "Allocation for Tx Queue %u failed\n", i); + for (i--; i >= 0; i--) + igc_free_tx_resources(adapter->tx_ring[i]); + break; + } + } + + return err; +} + +/** + * igc_clean_rx_ring - Free Rx Buffers per Queue + * @rx_ring: ring to free buffers from + */ +static void igc_clean_rx_ring(struct igc_ring *rx_ring) +{ + u16 i = rx_ring->next_to_clean; + + if (rx_ring->skb) + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + + /* Free all the Rx ring sk_buffs */ + while (i != rx_ring->next_to_alloc) { + struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; + + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + buffer_info->dma, + buffer_info->page_offset, + igc_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, + buffer_info->dma, + igc_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IGC_RX_DMA_ATTR); + __page_frag_cache_drain(buffer_info->page, + buffer_info->pagecnt_bias); + + i++; + if (i == rx_ring->count) + i = 0; + } + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +/** + * igc_clean_all_rx_rings - Free Rx Buffers for all queues + * @adapter: board private structure + */ +static void igc_clean_all_rx_rings(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + if (adapter->rx_ring[i]) + igc_clean_rx_ring(adapter->rx_ring[i]); +} + +/** + * igc_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + */ +static void igc_free_rx_resources(struct igc_ring *rx_ring) +{ + igc_clean_rx_ring(rx_ring); + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!rx_ring->desc) + return; + + dma_free_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + + rx_ring->desc = NULL; +} + +/** + * igc_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + */ +static void igc_free_all_rx_resources(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + igc_free_rx_resources(adapter->rx_ring[i]); +} + +/** + * igc_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + */ +static int igc_setup_rx_resources(struct igc_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + int size, desc_len; + + size = sizeof(struct igc_rx_buffer) * rx_ring->count; + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + goto err; + + desc_len = sizeof(union igc_adv_rx_desc); + + /* Round up to nearest 4K */ + rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = ALIGN(rx_ring->size, 4096); + + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + + if (!rx_ring->desc) + goto err; + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + + return 0; + +err: + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + dev_err(dev, + "Unable to allocate memory for the receive descriptor ring\n"); + return -ENOMEM; +} + +/** + * igc_setup_all_rx_resources - wrapper to allocate Rx resources + * (Descriptors) for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + */ +static int igc_setup_all_rx_resources(struct igc_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int i, err = 0; + + for (i = 0; i < adapter->num_rx_queues; i++) { + err = igc_setup_rx_resources(adapter->rx_ring[i]); + if (err) { + dev_err(&pdev->dev, + "Allocation for Rx Queue %u failed\n", i); + for (i--; i >= 0; i--) + igc_free_rx_resources(adapter->rx_ring[i]); + break; + } + } + + return err; +} + +/** + * igc_configure_rx_ring - Configure a receive ring after Reset + * @adapter: board private structure + * @ring: receive ring to be configured + * + * Configure the Rx unit of the MAC after a reset. + */ +static void igc_configure_rx_ring(struct igc_adapter *adapter, + struct igc_ring *ring) +{ + struct igc_hw *hw = &adapter->hw; + union igc_adv_rx_desc *rx_desc; + int reg_idx = ring->reg_idx; + u32 srrctl = 0, rxdctl = 0; + u64 rdba = ring->dma; + + /* disable the queue */ + wr32(IGC_RXDCTL(reg_idx), 0); + + /* Set DMA base address registers */ + wr32(IGC_RDBAL(reg_idx), + rdba & 0x00000000ffffffffULL); + wr32(IGC_RDBAH(reg_idx), rdba >> 32); + wr32(IGC_RDLEN(reg_idx), + ring->count * sizeof(union igc_adv_rx_desc)); + + /* initialize head and tail */ + ring->tail = adapter->io_addr + IGC_RDT(reg_idx); + wr32(IGC_RDH(reg_idx), 0); + writel(0, ring->tail); + + /* reset next-to- use/clean to place SW in sync with hardware */ + ring->next_to_clean = 0; + ring->next_to_use = 0; + + /* set descriptor configuration */ + srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; + if (ring_uses_large_buffer(ring)) + srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; + + wr32(IGC_SRRCTL(reg_idx), srrctl); + + rxdctl |= IGC_RX_PTHRESH; + rxdctl |= IGC_RX_HTHRESH << 8; + rxdctl |= IGC_RX_WTHRESH << 16; + + /* initialize rx_buffer_info */ + memset(ring->rx_buffer_info, 0, + sizeof(struct igc_rx_buffer) * ring->count); + + /* initialize Rx descriptor 0 */ + rx_desc = IGC_RX_DESC(ring, 0); + rx_desc->wb.upper.length = 0; + + /* enable receive descriptor fetching */ + rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; + + wr32(IGC_RXDCTL(reg_idx), rxdctl); +} + +/** + * igc_configure_rx - Configure receive Unit after Reset + * @adapter: board private structure + * + * Configure the Rx unit of the MAC after a reset. + */ +static void igc_configure_rx(struct igc_adapter *adapter) +{ + int i; + + /* Setup the HW Rx Head and Tail Descriptor Pointers and + * the Base and Length of the Rx Descriptor Ring + */ + for (i = 0; i < adapter->num_rx_queues; i++) + igc_configure_rx_ring(adapter, adapter->rx_ring[i]); +} + +/** + * igc_configure_tx_ring - Configure transmit ring after Reset + * @adapter: board private structure + * @ring: tx ring to configure + * + * Configure a transmit ring after a reset. + */ +static void igc_configure_tx_ring(struct igc_adapter *adapter, + struct igc_ring *ring) +{ + struct igc_hw *hw = &adapter->hw; + int reg_idx = ring->reg_idx; + u64 tdba = ring->dma; + u32 txdctl = 0; + + /* disable the queue */ + wr32(IGC_TXDCTL(reg_idx), 0); + wrfl(); + mdelay(10); + + wr32(IGC_TDLEN(reg_idx), + ring->count * sizeof(union igc_adv_tx_desc)); + wr32(IGC_TDBAL(reg_idx), + tdba & 0x00000000ffffffffULL); + wr32(IGC_TDBAH(reg_idx), tdba >> 32); + + ring->tail = adapter->io_addr + IGC_TDT(reg_idx); + wr32(IGC_TDH(reg_idx), 0); + writel(0, ring->tail); + + txdctl |= IGC_TX_PTHRESH; + txdctl |= IGC_TX_HTHRESH << 8; + txdctl |= IGC_TX_WTHRESH << 16; + + txdctl |= IGC_TXDCTL_QUEUE_ENABLE; + wr32(IGC_TXDCTL(reg_idx), txdctl); +} + +/** + * igc_configure_tx - Configure transmit Unit after Reset + * @adapter: board private structure + * + * Configure the Tx unit of the MAC after a reset. + */ +static void igc_configure_tx(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igc_configure_tx_ring(adapter, adapter->tx_ring[i]); +} + +/** + * igc_setup_mrqc - configure the multiple receive queue control registers + * @adapter: Board private structure + */ +static void igc_setup_mrqc(struct igc_adapter *adapter) +{ +} + +/** + * igc_setup_rctl - configure the receive control registers + * @adapter: Board private structure + */ +static void igc_setup_rctl(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 rctl; + + rctl = rd32(IGC_RCTL); + + rctl &= ~(3 << IGC_RCTL_MO_SHIFT); + rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); + + rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); + + /* enable stripping of CRC. Newer features require + * that the HW strips the CRC. + */ + rctl |= IGC_RCTL_SECRC; + + /* disable store bad packets and clear size bits. */ + rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); + + /* enable LPE to allow for reception of jumbo frames */ + rctl |= IGC_RCTL_LPE; + + /* disable queue 0 to prevent tail write w/o re-config */ + wr32(IGC_RXDCTL(0), 0); + + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in set_rx_mode + */ + rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ + IGC_RCTL_BAM | /* RX All Bcast Pkts */ + IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ + + rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ + IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ + } + + wr32(IGC_RCTL, rctl); +} + +/** + * igc_setup_tctl - configure the transmit control registers + * @adapter: Board private structure + */ +static void igc_setup_tctl(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 tctl; + + /* disable queue 0 which icould be enabled by default */ + wr32(IGC_TXDCTL(0), 0); + + /* Program the Transmit Control Register */ + tctl = rd32(IGC_TCTL); + tctl &= ~IGC_TCTL_CT; + tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | + (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); + + /* Enable transmits */ + tctl |= IGC_TCTL_EN; + + wr32(IGC_TCTL, tctl); +} + +/** + * igc_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + */ +static int igc_set_mac(struct net_device *netdev, void *p) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + + /* set the correct pool for the new PF MAC address in entry 0 */ + igc_set_default_mac_filter(adapter); + + return 0; +} + +static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) +{ +} + +static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) +{ + struct net_device *netdev = tx_ring->netdev; + + netif_stop_subqueue(netdev, tx_ring->queue_index); + + /* memory barriier comment */ + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. + */ + if (igc_desc_unused(tx_ring) < size) + return -EBUSY; + + /* A reprieve! */ + netif_wake_subqueue(netdev, tx_ring->queue_index); + + u64_stats_update_begin(&tx_ring->tx_syncp2); + tx_ring->tx_stats.restart_queue2++; + u64_stats_update_end(&tx_ring->tx_syncp2); + + return 0; +} + +static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) +{ + if (igc_desc_unused(tx_ring) >= size) + return 0; + return __igc_maybe_stop_tx(tx_ring, size); +} + +static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) +{ + /* set type for advanced descriptor with frame checksum insertion */ + u32 cmd_type = IGC_ADVTXD_DTYP_DATA | + IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS; + + return cmd_type; +} + +static void igc_tx_olinfo_status(struct igc_ring *tx_ring, + union igc_adv_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) +{ + u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; + + /* insert L4 checksum */ + olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) * + ((IGC_TXD_POPTS_TXSM << 8) / + IGC_TX_FLAGS_CSUM); + + /* insert IPv4 checksum */ + olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) * + (((IGC_TXD_POPTS_IXSM << 8)) / + IGC_TX_FLAGS_IPV4); + + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); +} + +static int igc_tx_map(struct igc_ring *tx_ring, + struct igc_tx_buffer *first, + const u8 hdr_len) +{ + struct sk_buff *skb = first->skb; + struct igc_tx_buffer *tx_buffer; + union igc_adv_tx_desc *tx_desc; + u32 tx_flags = first->tx_flags; + struct skb_frag_struct *frag; + u16 i = tx_ring->next_to_use; + unsigned int data_len, size; + dma_addr_t dma; + u32 cmd_type = igc_tx_cmd_type(skb, tx_flags); + + tx_desc = IGC_TX_DESC(tx_ring, i); + + igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); + + size = skb_headlen(skb); + data_len = skb->data_len; + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buffer = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGC_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + + dma += IGC_MAX_DATA_PER_TXD; + size -= IGC_MAX_DATA_PER_TXD; + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGC_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, + size, DMA_TO_DEVICE); + + tx_buffer = &tx_ring->tx_buffer_info[i]; + } + + /* write last descriptor with RS and EOP bits */ + cmd_type |= size | IGC_TXD_DCMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); + + /* set the timestamp */ + first->time_stamp = jiffies; + + /* Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + + /* Make sure there is space in the ring for the next send. */ + igc_maybe_stop_tx(tx_ring, DESC_NEEDED); + + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } + + return 0; +dma_error: + dev_err(tx_ring->dev, "TX DMA map failed\n"); + tx_buffer = &tx_ring->tx_buffer_info[i]; + + /* clear dma mappings for failed tx_buffer_info map */ + while (tx_buffer != first) { + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + if (i-- == 0) + i += tx_ring->count; + tx_buffer = &tx_ring->tx_buffer_info[i]; + } + + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + dev_kfree_skb_any(tx_buffer->skb); + tx_buffer->skb = NULL; + + tx_ring->next_to_use = i; + + return -1; +} + +static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + struct igc_ring *tx_ring) +{ + u16 count = TXD_USE_COUNT(skb_headlen(skb)); + __be16 protocol = vlan_get_protocol(skb); + struct igc_tx_buffer *first; + u32 tx_flags = 0; + unsigned short f; + u8 hdr_len = 0; + + /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, + * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, + * + 2 desc gap to keep tail from touching head, + * + 1 desc for context descriptor, + * otherwise try next time + */ + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + + if (igc_maybe_stop_tx(tx_ring, count + 3)) { + /* this is a hard error */ + return NETDEV_TX_BUSY; + } + + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + + skb_tx_timestamp(skb); + + /* record initial flags and protocol */ + first->tx_flags = tx_flags; + first->protocol = protocol; + + igc_tx_csum(tx_ring, first); + + igc_tx_map(tx_ring, first, hdr_len); + + return NETDEV_TX_OK; +} + +static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, + struct sk_buff *skb) +{ + unsigned int r_idx = skb->queue_mapping; + + if (r_idx >= adapter->num_tx_queues) + r_idx = r_idx % adapter->num_tx_queues; + + return adapter->tx_ring[r_idx]; +} + +static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + /* The minimum packet size with TCTL.PSP set is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + + return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); +} + +static inline void igc_rx_hash(struct igc_ring *ring, + union igc_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (ring->netdev->features & NETIF_F_RXHASH) + skb_set_hash(skb, + le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + PKT_HASH_TYPE_L3); +} + +/** + * igc_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. + */ +static void igc_process_skb_fields(struct igc_ring *rx_ring, + union igc_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + igc_rx_hash(rx_ring, rx_desc, skb); + + skb_record_rx_queue(skb, rx_ring->queue_index); + + skb->protocol = eth_type_trans(skb, rx_ring->netdev); +} + +static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, + const unsigned int size) +{ + struct igc_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + + rx_buffer->pagecnt_bias--; + + return rx_buffer; +} + +/** + * igc_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @skb: sk_buff to place the data into + * @size: size of buffer to be added + * + * This function will add the data contained in rx_buffer->page to the skb. + */ +static void igc_add_rx_frag(struct igc_ring *rx_ring, + struct igc_rx_buffer *rx_buffer, + struct sk_buff *skb, + unsigned int size) +{ +#if (PAGE_SIZE < 8192) + unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); + rx_buffer->page_offset ^= truesize; +#else + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IGC_SKB_PAD + size) : + SKB_DATA_ALIGN(size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); + rx_buffer->page_offset += truesize; +#endif +} + +static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, + struct igc_rx_buffer *rx_buffer, + union igc_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IGC_SKB_PAD + size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* build an skb around the page buffer */ + skb = build_skb(va - IGC_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, IGC_SKB_PAD); + __skb_put(skb, size); + + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + + return skb; +} + +static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, + struct igc_rx_buffer *rx_buffer, + union igc_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + unsigned int headlen; + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN); + if (unlikely(!skb)) + return NULL; + + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IGC_RX_HDR_LEN) + headlen = eth_get_headlen(va, IGC_RX_HDR_LEN); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + (va + headlen) - page_address(rx_buffer->page), + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + } else { + rx_buffer->pagecnt_bias++; + } + + return skb; +} + +/** + * igc_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + */ +static void igc_reuse_rx_page(struct igc_ring *rx_ring, + struct igc_rx_buffer *old_buff) +{ + u16 nta = rx_ring->next_to_alloc; + struct igc_rx_buffer *new_buff; + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* Transfer page from old buffer to new buffer. + * Move each member individually to avoid possible store + * forwarding stalls. + */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; +} + +static inline bool igc_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); +} + +static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) +{ + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; + + /* avoid re-using remote pages */ + if (unlikely(igc_page_is_reserved(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + return false; +#else +#define IGC_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) + + if (rx_buffer->page_offset > IGC_LAST_OFFSET) + return false; +#endif + + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } + + return true; +} + +/** + * igc_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + */ +static bool igc_is_non_eop(struct igc_ring *rx_ring, + union igc_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IGC_RX_DESC(rx_ring, ntc)); + + if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) + return false; + + return true; +} + +/** + * igc_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + */ +static bool igc_cleanup_headers(struct igc_ring *rx_ring, + union igc_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely((igc_test_staterr(rx_desc, + IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) { + struct net_device *netdev = rx_ring->netdev; + + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; + + return false; +} + +static void igc_put_rx_buffer(struct igc_ring *rx_ring, + struct igc_rx_buffer *rx_buffer) +{ + if (igc_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + igc_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, + IGC_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; +} + +/** + * igc_alloc_rx_buffers - Replace used receive buffers; packet split + * @adapter: address of board private structure + */ +static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) +{ + union igc_adv_rx_desc *rx_desc; + u16 i = rx_ring->next_to_use; + struct igc_rx_buffer *bi; + u16 bufsz; + + /* nothing to do */ + if (!cleaned_count) + return; + + rx_desc = IGC_RX_DESC(rx_ring, i); + bi = &rx_ring->rx_buffer_info[i]; + i -= rx_ring->count; + + bufsz = igc_rx_bufsz(rx_ring); + + do { + if (!igc_alloc_mapped_page(rx_ring, bi)) + break; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, bufsz, + DMA_FROM_DEVICE); + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + + rx_desc++; + bi++; + i++; + if (unlikely(!i)) { + rx_desc = IGC_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_buffer_info; + i -= rx_ring->count; + } + + /* clear the length for the next_to_use descriptor */ + rx_desc->wb.upper.length = 0; + + cleaned_count--; + } while (cleaned_count); + + i += rx_ring->count; + + if (rx_ring->next_to_use != i) { + /* record the next descriptor to use */ + rx_ring->next_to_use = i; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = i; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, rx_ring->tail); + } +} + +static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) +{ + unsigned int total_bytes = 0, total_packets = 0; + struct igc_ring *rx_ring = q_vector->rx.ring; + struct sk_buff *skb = rx_ring->skb; + u16 cleaned_count = igc_desc_unused(rx_ring); + + while (likely(total_packets < budget)) { + union igc_adv_rx_desc *rx_desc; + struct igc_rx_buffer *rx_buffer; + unsigned int size; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IGC_RX_BUFFER_WRITE) { + igc_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } + + rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * descriptor has been written back + */ + dma_rmb(); + + rx_buffer = igc_get_rx_buffer(rx_ring, size); + + /* retrieve a buffer from the ring */ + if (skb) + igc_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size); + else + skb = igc_construct_skb(rx_ring, rx_buffer, + rx_desc, size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_failed++; + rx_buffer->pagecnt_bias++; + break; + } + + igc_put_rx_buffer(rx_ring, rx_buffer); + cleaned_count++; + + /* fetch next buffer in frame if non-eop */ + if (igc_is_non_eop(rx_ring, rx_desc)) + continue; + + /* verify the packet layout is correct */ + if (igc_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_bytes += skb->len; + + /* populate checksum, timestamp, VLAN, and protocol */ + igc_process_skb_fields(rx_ring, rx_desc, skb); + + napi_gro_receive(&q_vector->napi, skb); + + /* reset skb pointer */ + skb = NULL; + + /* update budget accounting */ + total_packets++; + } + + /* place incomplete frames back on ring for completion */ + rx_ring->skb = skb; + + u64_stats_update_begin(&rx_ring->rx_syncp); + rx_ring->rx_stats.packets += total_packets; + rx_ring->rx_stats.bytes += total_bytes; + u64_stats_update_end(&rx_ring->rx_syncp); + q_vector->rx.total_packets += total_packets; + q_vector->rx.total_bytes += total_bytes; + + if (cleaned_count) + igc_alloc_rx_buffers(rx_ring, cleaned_count); + + return total_packets; +} + +static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; +} + +static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, + struct igc_rx_buffer *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) + return true; + + /* alloc new page for storage */ + page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_failed++; + return false; + } + + /* map page for use */ + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + igc_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IGC_RX_DMA_ATTR); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_page(page); + + rx_ring->rx_stats.alloc_failed++; + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = igc_rx_offset(rx_ring); + bi->pagecnt_bias = 1; + + return true; +} + +/** + * igc_clean_tx_irq - Reclaim resources after transmit completes + * @q_vector: pointer to q_vector containing needed info + * @napi_budget: Used to determine if we are in netpoll + * + * returns true if ring is completely cleaned + */ +static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) +{ + struct igc_adapter *adapter = q_vector->adapter; + unsigned int total_bytes = 0, total_packets = 0; + unsigned int budget = q_vector->tx.work_limit; + struct igc_ring *tx_ring = q_vector->tx.ring; + unsigned int i = tx_ring->next_to_clean; + struct igc_tx_buffer *tx_buffer; + union igc_adv_tx_desc *tx_desc; + + if (test_bit(__IGC_DOWN, &adapter->state)) + return true; + + tx_buffer = &tx_ring->tx_buffer_info[i]; + tx_desc = IGC_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + do { + union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + smp_rmb(); + + /* if DD is not set pending work has not been completed */ + if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buffer->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + + /* free the skb */ + napi_consume_skb(tx_buffer->skb, napi_budget); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + + /* clear tx_buffer data */ + dma_unmap_len_set(tx_buffer, len, 0); + + /* clear last DMA location and unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGC_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGC_TX_DESC(tx_ring, 0); + } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + netdev_tx_completed_queue(txring_txq(tx_ring), + total_packets, total_bytes); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->tx_syncp); + tx_ring->tx_stats.bytes += total_bytes; + tx_ring->tx_stats.packets += total_packets; + u64_stats_update_end(&tx_ring->tx_syncp); + q_vector->tx.total_bytes += total_bytes; + q_vector->tx.total_packets += total_packets; + + if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { + struct igc_hw *hw = &adapter->hw; + + /* Detect a transmit hang in hardware, this serializes the + * check with the clearing of time_stamp and movement of i + */ + clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); + if (tx_buffer->next_to_watch && + time_after(jiffies, tx_buffer->time_stamp + + (adapter->tx_timeout_factor * HZ)) && + !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { + /* detected Tx unit hang */ + dev_err(tx_ring->dev, + "Detected Tx Unit Hang\n" + " Tx Queue <%d>\n" + " TDH <%x>\n" + " TDT <%x>\n" + " next_to_use <%x>\n" + " next_to_clean <%x>\n" + "buffer_info[next_to_clean]\n" + " time_stamp <%lx>\n" + " next_to_watch <%p>\n" + " jiffies <%lx>\n" + " desc.status <%x>\n", + tx_ring->queue_index, + rd32(IGC_TDH(tx_ring->reg_idx)), + readl(tx_ring->tail), + tx_ring->next_to_use, + tx_ring->next_to_clean, + tx_buffer->time_stamp, + tx_buffer->next_to_watch, + jiffies, + tx_buffer->next_to_watch->wb.status); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); + + /* we are about to reset, no point in enabling stuff */ + return true; + } + } + +#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) + if (unlikely(total_packets && + netif_carrier_ok(tx_ring->netdev) && + igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) && + !(test_bit(__IGC_DOWN, &adapter->state))) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); + + u64_stats_update_begin(&tx_ring->tx_syncp); + tx_ring->tx_stats.restart_queue++; + u64_stats_update_end(&tx_ring->tx_syncp); + } + } + + return !!budget; +} + +/** + * igc_ioctl - I/O control method + * @netdev: network interface device structure + * @ifreq: frequency + * @cmd: command + */ +static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + default: + return -EOPNOTSUPP; + } +} + +/** + * igc_up - Open the interface and prepare it to handle traffic + * @adapter: board private structure + */ +static void igc_up(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + int i = 0; + + /* hardware has been reset, we need to reload some things */ + igc_configure(adapter); + + clear_bit(__IGC_DOWN, &adapter->state); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_enable(&adapter->q_vector[i]->napi); + + if (adapter->msix_entries) + igc_configure_msix(adapter); + else + igc_assign_vector(adapter->q_vector[0], 0); + + /* Clear any pending interrupts. */ + rd32(IGC_ICR); + igc_irq_enable(adapter); + + netif_tx_start_all_queues(adapter->netdev); + + /* start the watchdog. */ + hw->mac.get_link_status = 1; + schedule_work(&adapter->watchdog_task); +} + +/** + * igc_update_stats - Update the board statistics counters + * @adapter: board private structure + */ +static void igc_update_stats(struct igc_adapter *adapter) +{ +} + +static void igc_nfc_filter_exit(struct igc_adapter *adapter) +{ +} + +/** + * igc_down - Close the interface + * @adapter: board private structure + */ +static void igc_down(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct igc_hw *hw = &adapter->hw; + u32 tctl, rctl; + int i = 0; + + set_bit(__IGC_DOWN, &adapter->state); + + /* disable receives in the hardware */ + rctl = rd32(IGC_RCTL); + wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); + /* flush and sleep below */ + + igc_nfc_filter_exit(adapter); + + /* set trans_start so we don't get spurious watchdogs during reset */ + netif_trans_update(netdev); + + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + + /* disable transmits in the hardware */ + tctl = rd32(IGC_TCTL); + tctl &= ~IGC_TCTL_EN; + wr32(IGC_TCTL, tctl); + /* flush both disables and wait for them to finish */ + wrfl(); + usleep_range(10000, 20000); + + igc_irq_disable(adapter); + + adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; + + for (i = 0; i < adapter->num_q_vectors; i++) { + if (adapter->q_vector[i]) { + napi_synchronize(&adapter->q_vector[i]->napi); + napi_disable(&adapter->q_vector[i]->napi); + } + } + + del_timer_sync(&adapter->watchdog_timer); + del_timer_sync(&adapter->phy_info_timer); + + /* record the stats before reset*/ + spin_lock(&adapter->stats64_lock); + igc_update_stats(adapter); + spin_unlock(&adapter->stats64_lock); + + adapter->link_speed = 0; + adapter->link_duplex = 0; + + if (!pci_channel_offline(adapter->pdev)) + igc_reset(adapter); + + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; + + igc_clean_all_tx_rings(adapter); + igc_clean_all_rx_rings(adapter); +} + +static void igc_reinit_locked(struct igc_adapter *adapter) +{ + WARN_ON(in_interrupt()); + while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + igc_down(adapter); + igc_up(adapter); + clear_bit(__IGC_RESETTING, &adapter->state); +} + +static void igc_reset_task(struct work_struct *work) +{ + struct igc_adapter *adapter; + + adapter = container_of(work, struct igc_adapter, reset_task); + + netdev_err(adapter->netdev, "Reset adapter\n"); + igc_reinit_locked(adapter); +} + +/** + * igc_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + */ +static int igc_change_mtu(struct net_device *netdev, int new_mtu) +{ + int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct igc_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; + + while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + /* igc_down has a dependency on max_frame_size */ + adapter->max_frame_size = max_frame; + + if (netif_running(netdev)) + igc_down(adapter); + + dev_info(&pdev->dev, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + netdev->mtu = new_mtu; + + if (netif_running(netdev)) + igc_up(adapter); + else + igc_reset(adapter); + + clear_bit(__IGC_RESETTING, &adapter->state); + + return 0; +} + +/** + * igc_get_stats - Get System Network Statistics + * @netdev: network interface device structure + * + * Returns the address of the device statistics structure. + * The statistics are updated here and also from the timer callback. + */ +static struct net_device_stats *igc_get_stats(struct net_device *netdev) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + if (!test_bit(__IGC_RESETTING, &adapter->state)) + igc_update_stats(adapter); + + /* only return the current stats */ + return &netdev->stats; +} + +/** + * igc_configure - configure the hardware for RX and TX + * @adapter: private board structure + */ +static void igc_configure(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i = 0; + + igc_get_hw_control(adapter); + igc_set_rx_mode(netdev); + + igc_setup_tctl(adapter); + igc_setup_mrqc(adapter); + igc_setup_rctl(adapter); + + igc_configure_tx(adapter); + igc_configure_rx(adapter); + + igc_rx_fifo_flush_base(&adapter->hw); + + /* call igc_desc_unused which always leaves + * at least 1 descriptor unused to make sure + * next_to_use != next_to_clean + */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igc_ring *ring = adapter->rx_ring[i]; + + igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); + } +} + +/** + * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table + * @adapter: Pointer to adapter structure + * @index: Index of the RAR entry which need to be synced with MAC table + */ +static void igc_rar_set_index(struct igc_adapter *adapter, u32 index) +{ + u8 *addr = adapter->mac_table[index].addr; + struct igc_hw *hw = &adapter->hw; + u32 rar_low, rar_high; + + /* HW expects these to be in network order when they are plugged + * into the registers which are little endian. In order to guarantee + * that ordering we need to do an leXX_to_cpup here in order to be + * ready for the byteswap that occurs with writel + */ + rar_low = le32_to_cpup((__le32 *)(addr)); + rar_high = le16_to_cpup((__le16 *)(addr + 4)); + + /* Indicate to hardware the Address is Valid. */ + if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) { + if (is_valid_ether_addr(addr)) + rar_high |= IGC_RAH_AV; + + rar_high |= IGC_RAH_POOL_1 << + adapter->mac_table[index].queue; + } + + wr32(IGC_RAL(index), rar_low); + wrfl(); + wr32(IGC_RAH(index), rar_high); + wrfl(); +} + +/* Set default MAC address for the PF in the first RAR entry */ +static void igc_set_default_mac_filter(struct igc_adapter *adapter) +{ + struct igc_mac_addr *mac_table = &adapter->mac_table[0]; + + ether_addr_copy(mac_table->addr, adapter->hw.mac.addr); + mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; + + igc_rar_set_index(adapter, 0); +} + +/** + * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_rx_mode entry point is called whenever the unicast or multicast + * address lists or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast, + * promiscuous mode, and all-multi behavior. + */ +static void igc_set_rx_mode(struct net_device *netdev) +{ +} + +/** + * igc_msix_other - msix other interrupt handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t igc_msix_other(int irq, void *data) +{ + struct igc_adapter *adapter = data; + struct igc_hw *hw = &adapter->hw; + u32 icr = rd32(IGC_ICR); + + /* reading ICR causes bit 31 of EICR to be cleared */ + if (icr & IGC_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & IGC_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & IGC_ICR_LSC) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGC_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + wr32(IGC_EIMS, adapter->eims_other); + + return IRQ_HANDLED; +} + +/** + * igc_write_ivar - configure ivar for given MSI-X vector + * @hw: pointer to the HW structure + * @msix_vector: vector number we are allocating to a given ring + * @index: row index of IVAR register to write within IVAR table + * @offset: column offset of in IVAR, should be multiple of 8 + * + * The IVAR table consists of 2 columns, + * each containing an cause allocation for an Rx and Tx ring, and a + * variable number of rows depending on the number of queues supported. + */ +static void igc_write_ivar(struct igc_hw *hw, int msix_vector, + int index, int offset) +{ + u32 ivar = array_rd32(IGC_IVAR0, index); + + /* clear any bits that are currently set */ + ivar &= ~((u32)0xFF << offset); + + /* write vector and valid bit */ + ivar |= (msix_vector | IGC_IVAR_VALID) << offset; + + array_wr32(IGC_IVAR0, index, ivar); +} + +static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + int rx_queue = IGC_N0_QUEUE; + int tx_queue = IGC_N0_QUEUE; + + if (q_vector->rx.ring) + rx_queue = q_vector->rx.ring->reg_idx; + if (q_vector->tx.ring) + tx_queue = q_vector->tx.ring->reg_idx; + + switch (hw->mac.type) { + case igc_i225: + if (rx_queue > IGC_N0_QUEUE) + igc_write_ivar(hw, msix_vector, + rx_queue >> 1, + (rx_queue & 0x1) << 4); + if (tx_queue > IGC_N0_QUEUE) + igc_write_ivar(hw, msix_vector, + tx_queue >> 1, + ((tx_queue & 0x1) << 4) + 8); + q_vector->eims_value = BIT(msix_vector); + break; + default: + WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); + break; + } + + /* add q_vector eims value to global eims_enable_mask */ + adapter->eims_enable_mask |= q_vector->eims_value; + + /* configure q_vector to set itr on first interrupt */ + q_vector->set_itr = 1; +} + +/** + * igc_configure_msix - Configure MSI-X hardware + * @adapter: Pointer to adapter structure + * + * igc_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. + */ +static void igc_configure_msix(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + int i, vector = 0; + u32 tmp; + + adapter->eims_enable_mask = 0; + + /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case igc_i225: + /* Turn on MSI-X capability first, or our settings + * won't stick. And it will take days to debug. + */ + wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | + IGC_GPIE_PBA | IGC_GPIE_EIAME | + IGC_GPIE_NSICR); + + /* enable msix_other interrupt */ + adapter->eims_other = BIT(vector); + tmp = (vector++ | IGC_IVAR_VALID) << 8; + + wr32(IGC_IVAR_MISC, tmp); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } /* switch (hw->mac.type) */ + + adapter->eims_enable_mask |= adapter->eims_other; + + for (i = 0; i < adapter->num_q_vectors; i++) + igc_assign_vector(adapter->q_vector[i], vector++); + + wrfl(); +} + +static irqreturn_t igc_msix_ring(int irq, void *data) +{ + struct igc_q_vector *q_vector = data; + + /* Write the ITR value calculated from the previous interrupt. */ + igc_write_itr(q_vector); + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * igc_request_msix - Initialize MSI-X interrupts + * @adapter: Pointer to adapter structure + * + * igc_request_msix allocates MSI-X vectors and requests interrupts from the + * kernel. + */ +static int igc_request_msix(struct igc_adapter *adapter) +{ + int i = 0, err = 0, vector = 0, free_vector = 0; + struct net_device *netdev = adapter->netdev; + + err = request_irq(adapter->msix_entries[vector].vector, + &igc_msix_other, 0, netdev->name, adapter); + if (err) + goto err_out; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igc_q_vector *q_vector = adapter->q_vector[i]; + + vector++; + + q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); + + if (q_vector->rx.ring && q_vector->tx.ring) + sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else if (q_vector->tx.ring) + sprintf(q_vector->name, "%s-tx-%u", netdev->name, + q_vector->tx.ring->queue_index); + else if (q_vector->rx.ring) + sprintf(q_vector->name, "%s-rx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else + sprintf(q_vector->name, "%s-unused", netdev->name); + + err = request_irq(adapter->msix_entries[vector].vector, + igc_msix_ring, 0, q_vector->name, + q_vector); + if (err) + goto err_free; + } + + igc_configure_msix(adapter); + return 0; + +err_free: + /* free already assigned IRQs */ + free_irq(adapter->msix_entries[free_vector++].vector, adapter); + + vector--; + for (i = 0; i < vector; i++) { + free_irq(adapter->msix_entries[free_vector++].vector, + adapter->q_vector[i]); + } +err_out: + return err; +} + +/** + * igc_reset_q_vector - Reset config for interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be reset + * + * If NAPI is enabled it will delete any references to the + * NAPI struct. This is preparation for igc_free_q_vector. + */ +static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) +{ + struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; + + /* if we're coming from igc_set_interrupt_capability, the vectors are + * not yet allocated + */ + if (!q_vector) + return; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; + + netif_napi_del(&q_vector->napi); +} + +static void igc_reset_interrupt_capability(struct igc_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + if (adapter->msix_entries) { + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + } else if (adapter->flags & IGC_FLAG_HAS_MSI) { + pci_disable_msi(adapter->pdev); + } + + while (v_idx--) + igc_reset_q_vector(adapter, v_idx); +} + +/** + * igc_clear_interrupt_scheme - reset the device to a state of no interrupts + * @adapter: Pointer to adapter structure + * + * This function resets the device so that it has 0 rx queues, tx queues, and + * MSI-X interrupts allocated. + */ +static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) +{ + igc_free_q_vectors(adapter); + igc_reset_interrupt_capability(adapter); +} + +/** + * igc_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize + * + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + */ +static void igc_free_q_vectors(struct igc_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) { + igc_reset_q_vector(adapter, v_idx); + igc_free_q_vector(adapter, v_idx); + } +} + +/** + * igc_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. + */ +static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) +{ + struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; + + adapter->q_vector[v_idx] = NULL; + + /* igc_get_stats64() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + if (q_vector) + kfree_rcu(q_vector, rcu); +} + +/* Need to wait a few seconds after link up to get diagnostic information from + * the phy + */ +static void igc_update_phy_info(struct timer_list *t) +{ + struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); + + igc_get_phy_info(&adapter->hw); +} + +/** + * igc_has_link - check shared code for link and determine up/down + * @adapter: pointer to driver private info + */ +static bool igc_has_link(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + bool link_active = false; + + /* get_link_status is set on LSC (link status) interrupt or + * rx sequence error interrupt. get_link_status will stay + * false until the igc_check_for_link establishes link + * for copper adapters ONLY + */ + switch (hw->phy.media_type) { + case igc_media_type_copper: + if (!hw->mac.get_link_status) + return true; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; + break; + default: + case igc_media_type_unknown: + break; + } + + if (hw->mac.type == igc_i225 && + hw->phy.id == I225_I_PHY_ID) { + if (!netif_carrier_ok(adapter->netdev)) { + adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; + } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { + adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; + adapter->link_check_timeout = jiffies; + } + } + + return link_active; +} + +/** + * igc_watchdog - Timer Call-back + * @data: pointer to adapter cast into an unsigned long + */ +static void igc_watchdog(struct timer_list *t) +{ + struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); + /* Do the rest outside of interrupt context */ + schedule_work(&adapter->watchdog_task); +} + +static void igc_watchdog_task(struct work_struct *work) +{ + struct igc_adapter *adapter = container_of(work, + struct igc_adapter, + watchdog_task); + struct net_device *netdev = adapter->netdev; + struct igc_hw *hw = &adapter->hw; + struct igc_phy_info *phy = &hw->phy; + u16 phy_data, retry_count = 20; + u32 connsw; + u32 link; + int i; + + link = igc_has_link(adapter); + + if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { + if (time_after(jiffies, (adapter->link_check_timeout + HZ))) + adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; + else + link = false; + } + + /* Force link down if we have fiber to swap to */ + if (adapter->flags & IGC_FLAG_MAS_ENABLE) { + if (hw->phy.media_type == igc_media_type_copper) { + connsw = rd32(IGC_CONNSW); + if (!(connsw & IGC_CONNSW_AUTOSENSE_EN)) + link = 0; + } + } + if (link) { + if (!netif_carrier_ok(netdev)) { + u32 ctrl; + + hw->mac.ops.get_speed_and_duplex(hw, + &adapter->link_speed, + &adapter->link_duplex); + + ctrl = rd32(IGC_CTRL); + /* Link status message must follow this format */ + netdev_info(netdev, + "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", + netdev->name, + adapter->link_speed, + adapter->link_duplex == FULL_DUPLEX ? + "Full" : "Half", + (ctrl & IGC_CTRL_TFCE) && + (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : + (ctrl & IGC_CTRL_RFCE) ? "RX" : + (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); + + /* check if SmartSpeed worked */ + igc_check_downshift(hw); + if (phy->speed_downgraded) + netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); + + /* adjust timeout factor according to speed/duplex */ + adapter->tx_timeout_factor = 1; + switch (adapter->link_speed) { + case SPEED_10: + adapter->tx_timeout_factor = 14; + break; + case SPEED_100: + /* maybe add some timeout factor ? */ + break; + } + + if (adapter->link_speed != SPEED_1000) + goto no_wait; + + /* wait for Remote receiver status OK */ +retry_read_status: + if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, + &phy_data)) { + if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && + retry_count) { + msleep(100); + retry_count--; + goto retry_read_status; + } else if (!retry_count) { + dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); + } + } else { + dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); + } +no_wait: + netif_carrier_on(netdev); + + /* link state has changed, schedule phy info update */ + if (!test_bit(__IGC_DOWN, &adapter->state)) + mod_timer(&adapter->phy_info_timer, + round_jiffies(jiffies + 2 * HZ)); + } + } else { + if (netif_carrier_ok(netdev)) { + adapter->link_speed = 0; + adapter->link_duplex = 0; + + /* Links status message must follow this format */ + netdev_info(netdev, "igc: %s NIC Link is Down\n", + netdev->name); + netif_carrier_off(netdev); + + /* link state has changed, schedule phy info update */ + if (!test_bit(__IGC_DOWN, &adapter->state)) + mod_timer(&adapter->phy_info_timer, + round_jiffies(jiffies + 2 * HZ)); + + /* link is down, time to check for alternate media */ + if (adapter->flags & IGC_FLAG_MAS_ENABLE) { + if (adapter->flags & IGC_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } + + /* also check for alternate media here */ + } else if (!netif_carrier_ok(netdev) && + (adapter->flags & IGC_FLAG_MAS_ENABLE)) { + if (adapter->flags & IGC_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } + } + + spin_lock(&adapter->stats64_lock); + igc_update_stats(adapter); + spin_unlock(&adapter->stats64_lock); + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + if (!netif_carrier_ok(netdev)) { + /* We've lost link, so the controller stops DMA, + * but we've got queued Tx work that's never going + * to get done, so reset controller to flush Tx. + * (Do the reset outside of interrupt context). + */ + if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); + /* return immediately since reset is imminent */ + return; + } + } + + /* Force detection of hung controller every watchdog period */ + set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); + } + + /* Cause software interrupt to ensure Rx ring is cleaned */ + if (adapter->flags & IGC_FLAG_HAS_MSIX) { + u32 eics = 0; + + for (i = 0; i < adapter->num_q_vectors; i++) + eics |= adapter->q_vector[i]->eims_value; + wr32(IGC_EICS, eics); + } else { + wr32(IGC_ICS, IGC_ICS_RXDMT0); + } + + /* Reset the timer */ + if (!test_bit(__IGC_DOWN, &adapter->state)) { + if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + HZ)); + else + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 2 * HZ)); + } +} + +/** + * igc_update_ring_itr - update the dynamic ITR value based on packet size + * @q_vector: pointer to q_vector + * + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igc_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * NOTE: This function is called only when operating in a multiqueue + * receive environment. + */ +static void igc_update_ring_itr(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + int new_val = q_vector->itr_val; + int avg_wire_size = 0; + unsigned int packets; + + /* For non-gigabit speeds, just fix the interrupt rate at 4000 + * ints/sec - ITR timer value of 120 ticks. + */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + new_val = IGC_4K_ITR; + goto set_itr_val; + default: + break; + } + + packets = q_vector->rx.total_packets; + if (packets) + avg_wire_size = q_vector->rx.total_bytes / packets; + + packets = q_vector->tx.total_packets; + if (packets) + avg_wire_size = max_t(u32, avg_wire_size, + q_vector->tx.total_bytes / packets); + + /* if avg_wire_size isn't set no work was done */ + if (!avg_wire_size) + goto clear_counts; + + /* Add 24 bytes to size to account for CRC, preamble, and gap */ + avg_wire_size += 24; + + /* Don't starve jumbo frames */ + avg_wire_size = min(avg_wire_size, 3000); + + /* Give a little boost to mid-size frames */ + if (avg_wire_size > 300 && avg_wire_size < 1200) + new_val = avg_wire_size / 3; + else + new_val = avg_wire_size / 2; + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (new_val < IGC_20K_ITR && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + new_val = IGC_20K_ITR; + +set_itr_val: + if (new_val != q_vector->itr_val) { + q_vector->itr_val = new_val; + q_vector->set_itr = 1; + } +clear_counts: + q_vector->rx.total_bytes = 0; + q_vector->rx.total_packets = 0; + q_vector->tx.total_bytes = 0; + q_vector->tx.total_packets = 0; +} + +/** + * igc_update_itr - update the dynamic ITR value based on statistics + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * NOTE: These calculations are only valid when operating in a single- + * queue environment. + */ +static void igc_update_itr(struct igc_q_vector *q_vector, + struct igc_ring_container *ring_container) +{ + unsigned int packets = ring_container->total_packets; + unsigned int bytes = ring_container->total_bytes; + u8 itrval = ring_container->itr; + + /* no packets, exit with status unchanged */ + if (packets == 0) + return; + + switch (itrval) { + case lowest_latency: + /* handle TSO and jumbo frames */ + if (bytes / packets > 8000) + itrval = bulk_latency; + else if ((packets < 5) && (bytes > 512)) + itrval = low_latency; + break; + case low_latency: /* 50 usec aka 20000 ints/s */ + if (bytes > 10000) { + /* this if handles the TSO accounting */ + if (bytes / packets > 8000) + itrval = bulk_latency; + else if ((packets < 10) || ((bytes / packets) > 1200)) + itrval = bulk_latency; + else if ((packets > 35)) + itrval = lowest_latency; + } else if (bytes / packets > 2000) { + itrval = bulk_latency; + } else if (packets <= 2 && bytes < 512) { + itrval = lowest_latency; + } + break; + case bulk_latency: /* 250 usec aka 4000 ints/s */ + if (bytes > 25000) { + if (packets > 35) + itrval = low_latency; + } else if (bytes < 1500) { + itrval = low_latency; + } + break; + } + + /* clear work counters since we have the values we need */ + ring_container->total_bytes = 0; + ring_container->total_packets = 0; + + /* write updated itr to ring container */ + ring_container->itr = itrval; +} + +/** + * igc_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + */ +static irqreturn_t igc_intr_msi(int irq, void *data) +{ + struct igc_adapter *adapter = data; + struct igc_q_vector *q_vector = adapter->q_vector[0]; + struct igc_hw *hw = &adapter->hw; + /* read ICR disables interrupts using IAM */ + u32 icr = rd32(IGC_ICR); + + igc_write_itr(q_vector); + + if (icr & IGC_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & IGC_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { + hw->mac.get_link_status = 1; + if (!test_bit(__IGC_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * igc_intr - Legacy Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + */ +static irqreturn_t igc_intr(int irq, void *data) +{ + struct igc_adapter *adapter = data; + struct igc_q_vector *q_vector = adapter->q_vector[0]; + struct igc_hw *hw = &adapter->hw; + /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No + * need for the IMC write + */ + u32 icr = rd32(IGC_ICR); + + /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is + * not set, then the adapter didn't send an interrupt + */ + if (!(icr & IGC_ICR_INT_ASSERTED)) + return IRQ_NONE; + + igc_write_itr(q_vector); + + if (icr & IGC_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & IGC_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGC_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +static void igc_set_itr(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + u32 new_itr = q_vector->itr_val; + u8 current_itr = 0; + + /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + current_itr = 0; + new_itr = IGC_4K_ITR; + goto set_itr_now; + default: + break; + } + + igc_update_itr(q_vector, &q_vector->tx); + igc_update_itr(q_vector, &q_vector->rx); + + current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (current_itr == lowest_latency && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + current_itr = low_latency; + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ + break; + case low_latency: + new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ + break; + case bulk_latency: + new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ + break; + default: + break; + } + +set_itr_now: + if (new_itr != q_vector->itr_val) { + /* this attempts to bias the interrupt rate towards Bulk + * by adding intermediate steps when interrupt rate is + * increasing + */ + new_itr = new_itr > q_vector->itr_val ? + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : new_itr; + /* Don't write the value here; it resets the adapter's + * internal timer, and causes us to delay far longer than + * we should between interrupts. Instead, we write the ITR + * value at the beginning of the next interrupt so the timing + * ends up being correct. + */ + q_vector->itr_val = new_itr; + q_vector->set_itr = 1; + } +} + +static void igc_ring_irq_enable(struct igc_q_vector *q_vector) +{ + struct igc_adapter *adapter = q_vector->adapter; + struct igc_hw *hw = &adapter->hw; + + if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || + (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { + if (adapter->num_q_vectors == 1) + igc_set_itr(q_vector); + else + igc_update_ring_itr(q_vector); + } + + if (!test_bit(__IGC_DOWN, &adapter->state)) { + if (adapter->msix_entries) + wr32(IGC_EIMS, q_vector->eims_value); + else + igc_irq_enable(adapter); + } +} + +/** + * igc_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle + */ +static int igc_poll(struct napi_struct *napi, int budget) +{ + struct igc_q_vector *q_vector = container_of(napi, + struct igc_q_vector, + napi); + bool clean_complete = true; + int work_done = 0; + + if (q_vector->tx.ring) + clean_complete = igc_clean_tx_irq(q_vector, budget); + + if (q_vector->rx.ring) { + int cleaned = igc_clean_rx_irq(q_vector, budget); + + work_done += cleaned; + if (cleaned >= budget) + clean_complete = false; + } + + /* If all work not completed, return budget and keep polling */ + if (!clean_complete) + return budget; + + /* If not enough Rx work done, exit the polling mode */ + napi_complete_done(napi, work_done); + igc_ring_irq_enable(q_vector); + + return 0; +} + +/** + * igc_set_interrupt_capability - set MSI or MSI-X if supported + * @adapter: Pointer to adapter structure + * + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. + */ +static void igc_set_interrupt_capability(struct igc_adapter *adapter, + bool msix) +{ + int numvecs, i; + int err; + + if (!msix) + goto msi_only; + adapter->flags |= IGC_FLAG_HAS_MSIX; + + /* Number of supported queues. */ + adapter->num_rx_queues = adapter->rss_queues; + + adapter->num_tx_queues = adapter->rss_queues; + + /* start with one vector for every Rx queue */ + numvecs = adapter->num_rx_queues; + + /* if Tx handler is separate add 1 for every Tx queue */ + if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) + numvecs += adapter->num_tx_queues; + + /* store the number of vectors reserved for queues */ + adapter->num_q_vectors = numvecs; + + /* add 1 vector for link status interrupts */ + numvecs++; + + adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), + GFP_KERNEL); + + if (!adapter->msix_entries) + return; + + /* populate entry values */ + for (i = 0; i < numvecs; i++) + adapter->msix_entries[i].entry = i; + + err = pci_enable_msix_range(adapter->pdev, + adapter->msix_entries, + numvecs, + numvecs); + if (err > 0) + return; + + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + + igc_reset_interrupt_capability(adapter); + +msi_only: + adapter->flags &= ~IGC_FLAG_HAS_MSIX; + + adapter->rss_queues = 1; + adapter->flags |= IGC_FLAG_QUEUE_PAIRS; + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_q_vectors = 1; + if (!pci_enable_msi(adapter->pdev)) + adapter->flags |= IGC_FLAG_HAS_MSI; +} + +static void igc_add_ring(struct igc_ring *ring, + struct igc_ring_container *head) +{ + head->ring = ring; + head->count++; +} + +/** + * igc_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + */ +static int igc_alloc_q_vector(struct igc_adapter *adapter, + unsigned int v_count, unsigned int v_idx, + unsigned int txr_count, unsigned int txr_idx, + unsigned int rxr_count, unsigned int rxr_idx) +{ + struct igc_q_vector *q_vector; + struct igc_ring *ring; + int ring_count, size; + + /* igc only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; + + ring_count = txr_count + rxr_count; + size = sizeof(struct igc_q_vector) + + (sizeof(struct igc_ring) * ring_count); + + /* allocate q_vector and rings */ + q_vector = adapter->q_vector[v_idx]; + if (!q_vector) + q_vector = kzalloc(size, GFP_KERNEL); + else + memset(q_vector, 0, size); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igc_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + + /* initialize work limits */ + q_vector->tx.work_limit = adapter->tx_work_limit; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->io_addr + IGC_EITR(0); + q_vector->itr_val = IGC_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + /* initialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igc_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; + } + + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + igc_add_ring(ring, &q_vector->rx); + + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; +} + +/** + * igc_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +static int igc_alloc_q_vectors(struct igc_adapter *adapter) +{ + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int q_vectors = adapter->num_q_vectors; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igc_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; + } + } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + + err = igc_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igc_free_q_vector(adapter, v_idx); + + return -ENOMEM; +} + +/** + * igc_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize + * + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. + */ +static void igc_cache_ring_register(struct igc_adapter *adapter) +{ + int i = 0, j = 0; + + switch (adapter->hw.mac.type) { + case igc_i225: + /* Fall through */ + default: + for (; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->reg_idx = i; + for (; j < adapter->num_tx_queues; j++) + adapter->tx_ring[j]->reg_idx = j; + break; + } +} + +/** + * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * @adapter: Pointer to adapter structure + * + * This function initializes the interrupts and allocates all of the queues. + */ +static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + igc_set_interrupt_capability(adapter, msix); + + err = igc_alloc_q_vectors(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); + goto err_alloc_q_vectors; + } + + igc_cache_ring_register(adapter); + + return 0; + +err_alloc_q_vectors: + igc_reset_interrupt_capability(adapter); + return err; +} + +static void igc_free_irq(struct igc_adapter *adapter) +{ + if (adapter->msix_entries) { + int vector = 0, i; + + free_irq(adapter->msix_entries[vector++].vector, adapter); + + for (i = 0; i < adapter->num_q_vectors; i++) + free_irq(adapter->msix_entries[vector++].vector, + adapter->q_vector[i]); + } else { + free_irq(adapter->pdev->irq, adapter); + } +} + +/** + * igc_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + */ +static void igc_irq_disable(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 regval = rd32(IGC_EIAM); + + wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); + wr32(IGC_EIMC, adapter->eims_enable_mask); + regval = rd32(IGC_EIAC); + wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); + } + + wr32(IGC_IAM, 0); + wr32(IGC_IMC, ~0); + wrfl(); + + if (adapter->msix_entries) { + int vector = 0, i; + + synchronize_irq(adapter->msix_entries[vector++].vector); + + for (i = 0; i < adapter->num_q_vectors; i++) + synchronize_irq(adapter->msix_entries[vector++].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } +} + +/** + * igc_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + */ +static void igc_irq_enable(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; + u32 regval = rd32(IGC_EIAC); + + wr32(IGC_EIAC, regval | adapter->eims_enable_mask); + regval = rd32(IGC_EIAM); + wr32(IGC_EIAM, regval | adapter->eims_enable_mask); + wr32(IGC_EIMS, adapter->eims_enable_mask); + wr32(IGC_IMS, ims); + } else { + wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); + wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); + } +} + +/** + * igc_request_irq - initialize interrupts + * @adapter: Pointer to adapter structure + * + * Attempts to configure interrupts using the best available + * capabilities of the hardware and kernel. + */ +static int igc_request_irq(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + if (adapter->flags & IGC_FLAG_HAS_MSIX) { + err = igc_request_msix(adapter); + if (!err) + goto request_done; + /* fall back to MSI */ + igc_free_all_tx_resources(adapter); + igc_free_all_rx_resources(adapter); + + igc_clear_interrupt_scheme(adapter); + err = igc_init_interrupt_scheme(adapter, false); + if (err) + goto request_done; + igc_setup_all_tx_resources(adapter); + igc_setup_all_rx_resources(adapter); + igc_configure(adapter); + } + + igc_assign_vector(adapter->q_vector[0], 0); + + if (adapter->flags & IGC_FLAG_HAS_MSI) { + err = request_irq(pdev->irq, &igc_intr_msi, 0, + netdev->name, adapter); + if (!err) + goto request_done; + + /* fall back to legacy interrupts */ + igc_reset_interrupt_capability(adapter); + adapter->flags &= ~IGC_FLAG_HAS_MSI; + } + + err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, + netdev->name, adapter); + + if (err) + dev_err(&pdev->dev, "Error %d getting interrupt\n", + err); + +request_done: + return err; +} + +static void igc_write_itr(struct igc_q_vector *q_vector) +{ + u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; + + if (!q_vector->set_itr) + return; + + if (!itr_val) + itr_val = IGC_ITR_VAL_MASK; + + itr_val |= IGC_EITR_CNT_IGNR; + + writel(itr_val, q_vector->itr_register); + q_vector->set_itr = 0; +} + +/** + * igc_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + */ +static int __igc_open(struct net_device *netdev, bool resuming) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + int err = 0; + int i = 0; + + /* disallow open during test */ + + if (test_bit(__IGC_TESTING, &adapter->state)) { + WARN_ON(resuming); + return -EBUSY; + } + + netif_carrier_off(netdev); + + /* allocate transmit descriptors */ + err = igc_setup_all_tx_resources(adapter); + if (err) + goto err_setup_tx; + + /* allocate receive descriptors */ + err = igc_setup_all_rx_resources(adapter); + if (err) + goto err_setup_rx; + + igc_power_up_link(adapter); + + igc_configure(adapter); + + err = igc_request_irq(adapter); + if (err) + goto err_req_irq; + + /* Notify the stack of the actual queue counts. */ + netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + if (err) + goto err_set_queues; + + clear_bit(__IGC_DOWN, &adapter->state); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_enable(&adapter->q_vector[i]->napi); + + /* Clear any pending interrupts. */ + rd32(IGC_ICR); + igc_irq_enable(adapter); + + netif_tx_start_all_queues(netdev); + + /* start the watchdog. */ + hw->mac.get_link_status = 1; + schedule_work(&adapter->watchdog_task); + + return IGC_SUCCESS; + +err_set_queues: + igc_free_irq(adapter); +err_req_irq: + igc_release_hw_control(adapter); + igc_power_down_link(adapter); + igc_free_all_rx_resources(adapter); +err_setup_rx: + igc_free_all_tx_resources(adapter); +err_setup_tx: + igc_reset(adapter); + + return err; +} + +static int igc_open(struct net_device *netdev) +{ + return __igc_open(netdev, false); +} + +/** + * igc_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the driver's control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + */ +static int __igc_close(struct net_device *netdev, bool suspending) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + + WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); + + igc_down(adapter); + + igc_release_hw_control(adapter); + + igc_free_irq(adapter); + + igc_free_all_tx_resources(adapter); + igc_free_all_rx_resources(adapter); + + return 0; +} + +static int igc_close(struct net_device *netdev) +{ + if (netif_device_present(netdev) || netdev->dismantle) + return __igc_close(netdev, false); + return 0; +} + +static const struct net_device_ops igc_netdev_ops = { + .ndo_open = igc_open, + .ndo_stop = igc_close, + .ndo_start_xmit = igc_xmit_frame, + .ndo_set_mac_address = igc_set_mac, + .ndo_change_mtu = igc_change_mtu, + .ndo_get_stats = igc_get_stats, + .ndo_do_ioctl = igc_ioctl, +}; + +/* PCIe configuration access */ +void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) +{ + struct igc_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, value); +} + +void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) +{ + struct igc_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, *value); +} + +s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) +{ + struct igc_adapter *adapter = hw->back; + u16 cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -IGC_ERR_CONFIG; + + pci_read_config_word(adapter->pdev, cap_offset + reg, value); + + return IGC_SUCCESS; +} + +s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) +{ + struct igc_adapter *adapter = hw->back; + u16 cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -IGC_ERR_CONFIG; + + pci_write_config_word(adapter->pdev, cap_offset + reg, *value); + + return IGC_SUCCESS; +} + +u32 igc_rd32(struct igc_hw *hw, u32 reg) +{ + struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); + u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); + u32 value = 0; + + if (IGC_REMOVED(hw_addr)) + return ~value; + + value = readl(&hw_addr[reg]); + + /* reads should not return all F's */ + if (!(~value) && (!reg || !(~readl(hw_addr)))) { + struct net_device *netdev = igc->netdev; + + hw->hw_addr = NULL; + netif_device_detach(netdev); + netdev_err(netdev, "PCIe link lost, device now detached\n"); + } + + return value; +} + +/** + * igc_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igc_pci_tbl + * + * Returns 0 on success, negative on failure + * + * igc_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring the adapter private structure, + * and a hardware reset occur. + */ +static int igc_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct igc_adapter *adapter; + struct net_device *netdev; + struct igc_hw *hw; + const struct igc_info *ei = igc_info_tbl[ent->driver_data]; + int err, pci_using_dac; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + pci_using_dac = 0; + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + if (!err) { + err = dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(64)); + if (!err) + pci_using_dac = 1; + } else { + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + err = dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32)); + if (err) { + IGC_ERR("Wrong DMA configuration, aborting\n"); + goto err_dma; + } + } + } + + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, + IORESOURCE_MEM), + igc_driver_name); + if (err) + goto err_pci_reg; + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); + + err = -ENOMEM; + netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), + IGC_MAX_TX_QUEUES); + + if (!netdev) + goto err_alloc_etherdev; + + SET_NETDEV_DEV(netdev, &pdev->dev); + + pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->port_num = hw->bus.func; + adapter->msg_enable = GENMASK(debug - 1, 0); + + err = pci_save_state(pdev); + if (err) + goto err_ioremap; + + err = -EIO; + adapter->io_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!adapter->io_addr) + goto err_ioremap; + + /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ + hw->hw_addr = adapter->io_addr; + + netdev->netdev_ops = &igc_netdev_ops; + + netdev->watchdog_timeo = 5 * HZ; + + netdev->mem_start = pci_resource_start(pdev, 0); + netdev->mem_end = pci_resource_end(pdev, 0); + + /* PCI config space info */ + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->revision_id = pdev->revision; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + + /* Copy the default MAC and PHY function pointers */ + memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); + memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); + + /* Initialize skew-specific constants */ + err = ei->get_invariants(hw); + if (err) + goto err_sw_init; + + /* setup the private structure */ + err = igc_sw_init(adapter); + if (err) + goto err_sw_init; + + /* MTU range: 68 - 9216 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; + + /* before reading the NVM, reset the controller to put the device in a + * known good starting state + */ + hw->mac.ops.reset_hw(hw); + + if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { + /* copy the MAC address out of the NVM */ + if (hw->mac.ops.read_mac_addr(hw)) + dev_err(&pdev->dev, "NVM Read Error\n"); + } + + memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + + if (!is_valid_ether_addr(netdev->dev_addr)) { + dev_err(&pdev->dev, "Invalid MAC Address\n"); + err = -EIO; + goto err_eeprom; + } + + /* configure RXPBSIZE and TXPBSIZE */ + wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); + wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); + + timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); + timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); + + INIT_WORK(&adapter->reset_task, igc_reset_task); + INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + + /* Initialize link properties that are user-changeable */ + adapter->fc_autoneg = true; + hw->mac.autoneg = true; + hw->phy.autoneg_advertised = 0xaf; + + hw->fc.requested_mode = igc_fc_default; + hw->fc.current_mode = igc_fc_default; + + /* reset the hardware with the new settings */ + igc_reset(adapter); + + /* let the f/w know that the h/w is now under the control of the + * driver. + */ + igc_get_hw_control(adapter); + + strncpy(netdev->name, "eth%d", IFNAMSIZ); + err = register_netdev(netdev); + if (err) + goto err_register; + + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + + /* Check if Media Autosense is enabled */ + adapter->ei = *ei; + + /* print pcie link status and MAC address */ + pcie_print_link_status(pdev); + netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); + + return 0; + +err_register: + igc_release_hw_control(adapter); +err_eeprom: + if (!igc_check_reset_block(hw)) + igc_reset_phy(hw); +err_sw_init: + igc_clear_interrupt_scheme(adapter); + iounmap(adapter->io_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); +err_pci_reg: +err_dma: + pci_disable_device(pdev); + return err; +} + +/** + * igc_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * igc_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. This could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + */ +static void igc_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igc_adapter *adapter = netdev_priv(netdev); + + set_bit(__IGC_DOWN, &adapter->state); + + del_timer_sync(&adapter->watchdog_timer); + del_timer_sync(&adapter->phy_info_timer); + + cancel_work_sync(&adapter->reset_task); + cancel_work_sync(&adapter->watchdog_task); + + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. + */ + igc_release_hw_control(adapter); + unregister_netdev(netdev); + + igc_clear_interrupt_scheme(adapter); + pci_iounmap(pdev, adapter->io_addr); + pci_release_mem_regions(pdev); + + kfree(adapter->mac_table); + kfree(adapter->shadow_vfta); + free_netdev(netdev); + + pci_disable_pcie_error_reporting(pdev); + + pci_disable_device(pdev); +} + +static struct pci_driver igc_driver = { + .name = igc_driver_name, + .id_table = igc_pci_tbl, + .probe = igc_probe, + .remove = igc_remove, +}; + +static void igc_set_flag_queue_pairs(struct igc_adapter *adapter, + const u32 max_rss_queues) +{ + /* Determine if we need to pair queues. */ + /* If rss_queues > half of max_rss_queues, pair the queues in + * order to conserve interrupts due to limited supply. + */ + if (adapter->rss_queues > (max_rss_queues / 2)) + adapter->flags |= IGC_FLAG_QUEUE_PAIRS; + else + adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; +} + +static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) +{ + unsigned int max_rss_queues; + + /* Determine the maximum number of RSS queues supported. */ + max_rss_queues = IGC_MAX_RX_QUEUES; + + return max_rss_queues; +} + +static void igc_init_queue_configuration(struct igc_adapter *adapter) +{ + u32 max_rss_queues; + + max_rss_queues = igc_get_max_rss_queues(adapter); + adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); + + igc_set_flag_queue_pairs(adapter, max_rss_queues); +} + +/** + * igc_sw_init - Initialize general software structures (struct igc_adapter) + * @adapter: board private structure to initialize + * + * igc_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + */ +static int igc_sw_init(struct igc_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct igc_hw *hw = &adapter->hw; + + int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count; + + pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + + /* set default ring sizes */ + adapter->tx_ring_count = IGC_DEFAULT_TXD; + adapter->rx_ring_count = IGC_DEFAULT_RXD; + + /* set default ITR values */ + adapter->rx_itr_setting = IGC_DEFAULT_ITR; + adapter->tx_itr_setting = IGC_DEFAULT_ITR; + + /* set default work limits */ + adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; + + /* adjust max frame to be at least the size of a standard frame */ + adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; + + spin_lock_init(&adapter->nfc_lock); + spin_lock_init(&adapter->stats64_lock); + /* Assume MSI-X interrupts, will be checked during IRQ allocation */ + adapter->flags |= IGC_FLAG_HAS_MSIX; + + adapter->mac_table = kzalloc(size, GFP_ATOMIC); + if (!adapter->mac_table) + return -ENOMEM; + + igc_init_queue_configuration(adapter); + + /* This call may decrease the number of queues */ + if (igc_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + /* Explicitly disable IRQ since the NIC can be in any state. */ + igc_irq_disable(adapter); + + set_bit(__IGC_DOWN, &adapter->state); + + return 0; +} + +/** + * igc_get_hw_dev - return device + * @hw: pointer to hardware structure + * + * used by hardware layer to print debugging information + */ +struct net_device *igc_get_hw_dev(struct igc_hw *hw) +{ + struct igc_adapter *adapter = hw->back; + + return adapter->netdev; +} + +/** + * igc_init_module - Driver Registration Routine + * + * igc_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + */ +static int __init igc_init_module(void) +{ + int ret; + + pr_info("%s - version %s\n", + igc_driver_string, igc_driver_version); + + pr_info("%s\n", igc_copyright); + + ret = pci_register_driver(&igc_driver); + return ret; +} + +module_init(igc_init_module); + +/** + * igc_exit_module - Driver Exit Cleanup Routine + * + * igc_exit_module is called just before the driver is removed + * from memory. + */ +static void __exit igc_exit_module(void) +{ + pci_unregister_driver(&igc_driver); +} + +module_exit(igc_exit_module); +/* igc_main.c */ diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c new file mode 100644 index 000000000000..58f81aba0144 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_nvm.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +#include "igc_mac.h" +#include "igc_nvm.h" + +/** + * igc_poll_eerd_eewr_done - Poll for EEPROM read/write completion + * @hw: pointer to the HW structure + * @ee_reg: EEPROM flag for polling + * + * Polls the EEPROM status bit for either read or write completion based + * upon the value of 'ee_reg'. + */ +static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg) +{ + s32 ret_val = -IGC_ERR_NVM; + u32 attempts = 100000; + u32 i, reg = 0; + + for (i = 0; i < attempts; i++) { + if (ee_reg == IGC_NVM_POLL_READ) + reg = rd32(IGC_EERD); + else + reg = rd32(IGC_EEWR); + + if (reg & IGC_NVM_RW_REG_DONE) { + ret_val = 0; + break; + } + + udelay(5); + } + + return ret_val; +} + +/** + * igc_acquire_nvm - Generic request for access to EEPROM + * @hw: pointer to the HW structure + * + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -IGC_ERR_NVM (-1). + */ +s32 igc_acquire_nvm(struct igc_hw *hw) +{ + s32 timeout = IGC_NVM_GRANT_ATTEMPTS; + u32 eecd = rd32(IGC_EECD); + s32 ret_val = 0; + + wr32(IGC_EECD, eecd | IGC_EECD_REQ); + eecd = rd32(IGC_EECD); + + while (timeout) { + if (eecd & IGC_EECD_GNT) + break; + udelay(5); + eecd = rd32(IGC_EECD); + timeout--; + } + + if (!timeout) { + eecd &= ~IGC_EECD_REQ; + wr32(IGC_EECD, eecd); + hw_dbg("Could not acquire NVM grant\n"); + ret_val = -IGC_ERR_NVM; + } + + return ret_val; +} + +/** + * igc_release_nvm - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit. + */ +void igc_release_nvm(struct igc_hw *hw) +{ + u32 eecd; + + eecd = rd32(IGC_EECD); + eecd &= ~IGC_EECD_REQ; + wr32(IGC_EECD, eecd); +} + +/** + * igc_read_nvm_eerd - Reads EEPROM using EERD register + * @hw: pointer to the HW structure + * @offset: offset of word in the EEPROM to read + * @words: number of words to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + */ +s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct igc_nvm_info *nvm = &hw->nvm; + u32 i, eerd = 0; + s32 ret_val = 0; + + /* A check for invalid values: offset too large, too many words, + * and not enough words. + */ + if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || + words == 0) { + hw_dbg("nvm parameter(s) out of bounds\n"); + ret_val = -IGC_ERR_NVM; + goto out; + } + + for (i = 0; i < words; i++) { + eerd = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) + + IGC_NVM_RW_REG_START; + + wr32(IGC_EERD, eerd); + ret_val = igc_poll_eerd_eewr_done(hw, IGC_NVM_POLL_READ); + if (ret_val) + break; + + data[i] = (rd32(IGC_EERD) >> IGC_NVM_RW_REG_DATA); + } + +out: + return ret_val; +} + +/** + * igc_read_mac_addr - Read device MAC address + * @hw: pointer to the HW structure + */ +s32 igc_read_mac_addr(struct igc_hw *hw) +{ + u32 rar_high; + u32 rar_low; + u16 i; + + rar_high = rd32(IGC_RAH(0)); + rar_low = rd32(IGC_RAL(0)); + + for (i = 0; i < IGC_RAL_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8)); + + for (i = 0; i < IGC_RAH_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8)); + + for (i = 0; i < ETH_ALEN; i++) + hw->mac.addr[i] = hw->mac.perm_addr[i]; + + return 0; +} + +/** + * igc_validate_nvm_checksum - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + */ +s32 igc_validate_nvm_checksum(struct igc_hw *hw) +{ + u16 checksum = 0; + u16 i, nvm_data; + s32 ret_val = 0; + + for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + hw_dbg("NVM Read Error\n"); + goto out; + } + checksum += nvm_data; + } + + if (checksum != (u16)NVM_SUM) { + hw_dbg("NVM Checksum Invalid\n"); + ret_val = -IGC_ERR_NVM; + goto out; + } + +out: + return ret_val; +} + +/** + * igc_update_nvm_checksum - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. + */ +s32 igc_update_nvm_checksum(struct igc_hw *hw) +{ + u16 checksum = 0; + u16 i, nvm_data; + s32 ret_val; + + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + hw_dbg("NVM Read Error while updating checksum.\n"); + goto out; + } + checksum += nvm_data; + } + checksum = (u16)NVM_SUM - checksum; + ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); + if (ret_val) + hw_dbg("NVM Write Error while updating checksum.\n"); + +out: + return ret_val; +} diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h new file mode 100644 index 000000000000..f9fc2e9cfb03 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_nvm.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_NVM_H_ +#define _IGC_NVM_H_ + +s32 igc_acquire_nvm(struct igc_hw *hw); +void igc_release_nvm(struct igc_hw *hw); +s32 igc_read_mac_addr(struct igc_hw *hw); +s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data); +s32 igc_validate_nvm_checksum(struct igc_hw *hw); +s32 igc_update_nvm_checksum(struct igc_hw *hw); + +#endif diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c new file mode 100644 index 000000000000..38e43e6fc1c7 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -0,0 +1,791 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Intel Corporation */ + +#include "igc_phy.h" + +/* forward declaration */ +static s32 igc_phy_setup_autoneg(struct igc_hw *hw); +static s32 igc_wait_autoneg(struct igc_hw *hw); + +/** + * igc_check_reset_block - Check if PHY reset is blocked + * @hw: pointer to the HW structure + * + * Read the PHY management control register and check whether a PHY reset + * is blocked. If a reset is not blocked return 0, otherwise + * return IGC_ERR_BLK_PHY_RESET (12). + */ +s32 igc_check_reset_block(struct igc_hw *hw) +{ + u32 manc; + + manc = rd32(IGC_MANC); + + return (manc & IGC_MANC_BLK_PHY_RST_ON_IDE) ? + IGC_ERR_BLK_PHY_RESET : 0; +} + +/** + * igc_get_phy_id - Retrieve the PHY ID and revision + * @hw: pointer to the HW structure + * + * Reads the PHY registers and stores the PHY ID and possibly the PHY + * revision in the hardware structure. + */ +s32 igc_get_phy_id(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u16 phy_id; + + ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); + if (ret_val) + goto out; + + phy->id = (u32)(phy_id << 16); + usleep_range(200, 500); + ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); + if (ret_val) + goto out; + + phy->id |= (u32)(phy_id & PHY_REVISION_MASK); + phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); + +out: + return ret_val; +} + +/** + * igc_phy_has_link - Polls PHY for link + * @hw: pointer to the HW structure + * @iterations: number of times to poll for link + * @usec_interval: delay between polling attempts + * @success: pointer to whether polling was successful or not + * + * Polls the PHY status register for link, 'iterations' number of times. + */ +s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations, + u32 usec_interval, bool *success) +{ + u16 i, phy_status; + s32 ret_val = 0; + + for (i = 0; i < iterations; i++) { + /* Some PHYs require the PHY_STATUS register to be read + * twice due to the link bit being sticky. No harm doing + * it across the board. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val && usec_interval > 0) { + /* If the first read fails, another entity may have + * ownership of the resources, wait and try again to + * see if they have relinquished the resources yet. + */ + if (usec_interval >= 1000) + mdelay(usec_interval / 1000); + else + udelay(usec_interval); + } + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + if (phy_status & MII_SR_LINK_STATUS) + break; + if (usec_interval >= 1000) + mdelay(usec_interval / 1000); + else + udelay(usec_interval); + } + + *success = (i < iterations) ? true : false; + + return ret_val; +} + +/** + * igc_power_up_phy_copper - Restore copper link in case of PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, restore the link to previous settings. + */ +void igc_power_up_phy_copper(struct igc_hw *hw) +{ + u16 mii_reg = 0; + + /* The PHY will retain its settings across a power down/up cycle */ + hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); + mii_reg &= ~MII_CR_POWER_DOWN; + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); +} + +/** + * igc_power_down_phy_copper - Power down copper PHY + * @hw: pointer to the HW structure + * + * Power down PHY to save power when interface is down and wake on lan + * is not enabled. + */ +void igc_power_down_phy_copper(struct igc_hw *hw) +{ + u16 mii_reg = 0; + + /* The PHY will retain its settings across a power down/up cycle */ + hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); + mii_reg |= MII_CR_POWER_DOWN; + + /* Temporary workaround - should be removed when PHY will implement + * IEEE registers as properly + */ + /* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/ + usleep_range(1000, 2000); +} + +/** + * igc_check_downshift - Checks whether a downshift in speed occurred + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns 1 + * + * A downshift is detected by querying the PHY link health. + */ +s32 igc_check_downshift(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + u16 phy_data, offset, mask; + s32 ret_val; + + switch (phy->type) { + case igc_phy_i225: + default: + /* speed downshift not supported */ + phy->speed_downgraded = false; + ret_val = 0; + goto out; + } + + ret_val = phy->ops.read_reg(hw, offset, &phy_data); + + if (!ret_val) + phy->speed_downgraded = (phy_data & mask) ? true : false; + +out: + return ret_val; +} + +/** + * igc_phy_hw_reset - PHY hardware reset + * @hw: pointer to the HW structure + * + * Verify the reset block is not blocking us from resetting. Acquire + * semaphore (if necessary) and read/set/write the device control reset + * bit in the PHY. Wait the appropriate delay time for the device to + * reset and release the semaphore (if necessary). + */ +s32 igc_phy_hw_reset(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + s32 ret_val; + u32 ctrl; + + ret_val = igc_check_reset_block(hw); + if (ret_val) { + ret_val = 0; + goto out; + } + + ret_val = phy->ops.acquire(hw); + if (ret_val) + goto out; + + ctrl = rd32(IGC_CTRL); + wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST); + wrfl(); + + udelay(phy->reset_delay_us); + + wr32(IGC_CTRL, ctrl); + wrfl(); + + usleep_range(1500, 2000); + + phy->ops.release(hw); + +out: + return ret_val; +} + +/** + * igc_copper_link_autoneg - Setup/Enable autoneg for copper link + * @hw: pointer to the HW structure + * + * Performs initial bounds checking on autoneg advertisement parameter, then + * configure to advertise the full capability. Setup the PHY to autoneg + * and restart the negotiation process between the link partner. If + * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. + */ +static s32 igc_copper_link_autoneg(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + u16 phy_ctrl; + s32 ret_val; + + /* Perform some bounds checking on the autoneg advertisement + * parameter. + */ + phy->autoneg_advertised &= phy->autoneg_mask; + + /* If autoneg_advertised is zero, we assume it was not defaulted + * by the calling code so we set to advertise full capability. + */ + if (phy->autoneg_advertised == 0) + phy->autoneg_advertised = phy->autoneg_mask; + + hw_dbg("Reconfiguring auto-neg advertisement params\n"); + ret_val = igc_phy_setup_autoneg(hw); + if (ret_val) { + hw_dbg("Error Setting up Auto-Negotiation\n"); + goto out; + } + hw_dbg("Restarting Auto-Neg\n"); + + /* Restart auto-negotiation by setting the Auto Neg Enable bit and + * the Auto Neg Restart bit in the PHY control register. + */ + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); + if (ret_val) + goto out; + + phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl); + if (ret_val) + goto out; + + /* Does the user want to wait for Auto-Neg to complete here, or + * check at a later time (for example, callback routine). + */ + if (phy->autoneg_wait_to_complete) { + ret_val = igc_wait_autoneg(hw); + if (ret_val) { + hw_dbg("Error while waiting for autoneg to complete\n"); + goto out; + } + } + + hw->mac.get_link_status = true; + +out: + return ret_val; +} + +/** + * igc_wait_autoneg - Wait for auto-neg completion + * @hw: pointer to the HW structure + * + * Waits for auto-negotiation to complete or for the auto-negotiation time + * limit to expire, which ever happens first. + */ +static s32 igc_wait_autoneg(struct igc_hw *hw) +{ + u16 i, phy_status; + s32 ret_val = 0; + + /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ + for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + if (phy_status & MII_SR_AUTONEG_COMPLETE) + break; + msleep(100); + } + + /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation + * has completed. + */ + return ret_val; +} + +/** + * igc_phy_setup_autoneg - Configure PHY for auto-negotiation + * @hw: pointer to the HW structure + * + * Reads the MII auto-neg advertisement register and/or the 1000T control + * register and if the PHY is already setup for auto-negotiation, then + * return successful. Otherwise, setup advertisement and flow control to + * the appropriate values for the wanted auto-negotiation. + */ +static s32 igc_phy_setup_autoneg(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + u16 aneg_multigbt_an_ctrl = 0; + u16 mii_1000t_ctrl_reg = 0; + u16 mii_autoneg_adv_reg; + s32 ret_val; + + phy->autoneg_advertised &= phy->autoneg_mask; + + /* Read the MII Auto-Neg Advertisement Register (Address 4). */ + ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); + if (ret_val) + return ret_val; + + if (phy->autoneg_mask & ADVERTISE_1000_FULL) { + /* Read the MII 1000Base-T Control Register (Address 9). */ + ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, + &mii_1000t_ctrl_reg); + if (ret_val) + return ret_val; + } + + if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && + hw->phy.id == I225_I_PHY_ID) { + /* Read the MULTI GBT AN Control Register - reg 7.32 */ + ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK << + MMD_DEVADDR_SHIFT) | + ANEG_MULTIGBT_AN_CTRL, + &aneg_multigbt_an_ctrl); + + if (ret_val) + return ret_val; + } + + /* Need to parse both autoneg_advertised and fc and set up + * the appropriate PHY registers. First we will parse for + * autoneg_advertised software override. Since we can advertise + * a plethora of combinations, we need to check each bit + * individually. + */ + + /* First we clear all the 10/100 mb speed bits in the Auto-Neg + * Advertisement Register (Address 4) and the 1000 mb speed bits in + * the 1000Base-T Control Register (Address 9). + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | + NWAY_AR_100TX_HD_CAPS | + NWAY_AR_10T_FD_CAPS | + NWAY_AR_10T_HD_CAPS); + mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); + + hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised); + + /* Do we want to advertise 10 Mb Half Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_10_HALF) { + hw_dbg("Advertise 10mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; + } + + /* Do we want to advertise 10 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_10_FULL) { + hw_dbg("Advertise 10mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; + } + + /* Do we want to advertise 100 Mb Half Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_100_HALF) { + hw_dbg("Advertise 100mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; + } + + /* Do we want to advertise 100 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_100_FULL) { + hw_dbg("Advertise 100mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; + } + + /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ + if (phy->autoneg_advertised & ADVERTISE_1000_HALF) + hw_dbg("Advertise 1000mb Half duplex request denied!\n"); + + /* Do we want to advertise 1000 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { + hw_dbg("Advertise 1000mb Full duplex\n"); + mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; + } + + /* We do not allow the Phy to advertise 2500 Mb Half Duplex */ + if (phy->autoneg_advertised & ADVERTISE_2500_HALF) + hw_dbg("Advertise 2500mb Half duplex request denied!\n"); + + /* Do we want to advertise 2500 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_2500_FULL) { + hw_dbg("Advertise 2500mb Full duplex\n"); + aneg_multigbt_an_ctrl |= CR_2500T_FD_CAPS; + } else { + aneg_multigbt_an_ctrl &= ~CR_2500T_FD_CAPS; + } + + /* Check for a software override of the flow control settings, and + * setup the PHY advertisement registers accordingly. If + * auto-negotiation is enabled, then software will have to set the + * "PAUSE" bits to the correct value in the Auto-Negotiation + * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- + * negotiation. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * but we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: No software override. The flow control configuration + * in the EEPROM is used. + */ + switch (hw->fc.current_mode) { + case igc_fc_none: + /* Flow control (Rx & Tx) is completely disabled by a + * software over-ride. + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case igc_fc_rx_pause: + /* Rx Flow control is enabled, and Tx Flow control is + * disabled, by a software over-ride. + * + * Since there really isn't a way to advertise that we are + * capable of Rx Pause ONLY, we will advertise that we + * support both symmetric and asymmetric Rx PAUSE. Later + * (in igc_config_fc_after_link_up) we will disable the + * hw's ability to send PAUSE frames. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case igc_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is + * disabled, by a software over-ride. + */ + mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; + mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; + break; + case igc_fc_full: + /* Flow control (both Rx and Tx) is enabled by a software + * over-ride. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + default: + hw_dbg("Flow control param set incorrectly\n"); + return -IGC_ERR_CONFIG; + } + + ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); + if (ret_val) + return ret_val; + + hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); + + if (phy->autoneg_mask & ADVERTISE_1000_FULL) + ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, + mii_1000t_ctrl_reg); + + if ((phy->autoneg_mask & ADVERTISE_2500_FULL) && + hw->phy.id == I225_I_PHY_ID) + ret_val = phy->ops.write_reg(hw, + (STANDARD_AN_REG_MASK << + MMD_DEVADDR_SHIFT) | + ANEG_MULTIGBT_AN_CTRL, + aneg_multigbt_an_ctrl); + + return ret_val; +} + +/** + * igc_setup_copper_link - Configure copper link settings + * @hw: pointer to the HW structure + * + * Calls the appropriate function to configure the link for auto-neg or forced + * speed and duplex. Then we check for link, once link is established calls + * to configure collision distance and flow control are called. If link is + * not established, we return -IGC_ERR_PHY (-2). + */ +s32 igc_setup_copper_link(struct igc_hw *hw) +{ + s32 ret_val = 0; + bool link; + + if (hw->mac.autoneg) { + /* Setup autoneg and flow control advertisement and perform + * autonegotiation. + */ + ret_val = igc_copper_link_autoneg(hw); + if (ret_val) + goto out; + } else { + /* PHY will be set to 10H, 10F, 100H or 100F + * depending on user settings. + */ + hw_dbg("Forcing Speed and Duplex\n"); + ret_val = hw->phy.ops.force_speed_duplex(hw); + if (ret_val) { + hw_dbg("Error Forcing Speed and Duplex\n"); + goto out; + } + } + + /* Check link status. Wait up to 100 microseconds for link to become + * valid. + */ + ret_val = igc_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link); + if (ret_val) + goto out; + + if (link) { + hw_dbg("Valid link established!!!\n"); + igc_config_collision_dist(hw); + ret_val = igc_config_fc_after_link_up(hw); + } else { + hw_dbg("Unable to establish link!!!\n"); + } + +out: + return ret_val; +} + +/** + * igc_read_phy_reg_mdic - Read MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the MDI control register in the PHY at offset and stores the + * information read to data. + */ +static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data) +{ + struct igc_phy_info *phy = &hw->phy; + u32 i, mdic = 0; + s32 ret_val = 0; + + if (offset > MAX_PHY_REG_ADDRESS) { + hw_dbg("PHY Address %d is out of range\n", offset); + ret_val = -IGC_ERR_PARAM; + goto out; + } + + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + mdic = ((offset << IGC_MDIC_REG_SHIFT) | + (phy->addr << IGC_MDIC_PHY_SHIFT) | + (IGC_MDIC_OP_READ)); + + wr32(IGC_MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { + usleep_range(500, 1000); + mdic = rd32(IGC_MDIC); + if (mdic & IGC_MDIC_READY) + break; + } + if (!(mdic & IGC_MDIC_READY)) { + hw_dbg("MDI Read did not complete\n"); + ret_val = -IGC_ERR_PHY; + goto out; + } + if (mdic & IGC_MDIC_ERROR) { + hw_dbg("MDI Error\n"); + ret_val = -IGC_ERR_PHY; + goto out; + } + *data = (u16)mdic; + +out: + return ret_val; +} + +/** + * igc_write_phy_reg_mdic - Write MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write to register at offset + * + * Writes data to MDI control register in the PHY at offset. + */ +static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data) +{ + struct igc_phy_info *phy = &hw->phy; + u32 i, mdic = 0; + s32 ret_val = 0; + + if (offset > MAX_PHY_REG_ADDRESS) { + hw_dbg("PHY Address %d is out of range\n", offset); + ret_val = -IGC_ERR_PARAM; + goto out; + } + + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to write the desired data. + */ + mdic = (((u32)data) | + (offset << IGC_MDIC_REG_SHIFT) | + (phy->addr << IGC_MDIC_PHY_SHIFT) | + (IGC_MDIC_OP_WRITE)); + + wr32(IGC_MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { + usleep_range(500, 1000); + mdic = rd32(IGC_MDIC); + if (mdic & IGC_MDIC_READY) + break; + } + if (!(mdic & IGC_MDIC_READY)) { + hw_dbg("MDI Write did not complete\n"); + ret_val = -IGC_ERR_PHY; + goto out; + } + if (mdic & IGC_MDIC_ERROR) { + hw_dbg("MDI Error\n"); + ret_val = -IGC_ERR_PHY; + goto out; + } + +out: + return ret_val; +} + +/** + * __igc_access_xmdio_reg - Read/write XMDIO register + * @hw: pointer to the HW structure + * @address: XMDIO address to program + * @dev_addr: device address to program + * @data: pointer to value to read/write from/to the XMDIO address + * @read: boolean flag to indicate read or write + */ +static s32 __igc_access_xmdio_reg(struct igc_hw *hw, u16 address, + u8 dev_addr, u16 *data, bool read) +{ + s32 ret_val; + + ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, dev_addr); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, address); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, IGC_MMDAC_FUNC_DATA | + dev_addr); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, IGC_MMDAAD, data); + else + ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, *data); + if (ret_val) + return ret_val; + + /* Recalibrate the device back to 0 */ + ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, 0); + if (ret_val) + return ret_val; + + return ret_val; +} + +/** + * igc_read_xmdio_reg - Read XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be read from the EMI address + */ +static s32 igc_read_xmdio_reg(struct igc_hw *hw, u16 addr, + u8 dev_addr, u16 *data) +{ + return __igc_access_xmdio_reg(hw, addr, dev_addr, data, true); +} + +/** + * igc_write_xmdio_reg - Write XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be written to the XMDIO address + */ +static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr, + u8 dev_addr, u16 data) +{ + return __igc_access_xmdio_reg(hw, addr, dev_addr, &data, false); +} + +/** + * igc_write_phy_reg_gpy - Write GPY PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + */ +s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) +{ + u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT; + s32 ret_val; + + offset = offset & GPY_REG_MASK; + + if (!dev_addr) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + ret_val = igc_write_phy_reg_mdic(hw, offset, data); + if (ret_val) + return ret_val; + hw->phy.ops.release(hw); + } else { + ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr, + data); + } + + return ret_val; +} + +/** + * igc_read_phy_reg_gpy - Read GPY PHY register + * @hw: pointer to the HW structure + * @offset: lower half is register offset to read to + * upper half is MMD to use. + * @data: data to read at register offset + * + * Acquires semaphore, if necessary, then reads the data in the PHY register + * at the offset. Release any acquired semaphores before exiting. + */ +s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) +{ + u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT; + s32 ret_val; + + offset = offset & GPY_REG_MASK; + + if (!dev_addr) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + ret_val = igc_read_phy_reg_mdic(hw, offset, data); + if (ret_val) + return ret_val; + hw->phy.ops.release(hw); + } else { + ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr, + data); + } + + return ret_val; +} diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h new file mode 100644 index 000000000000..25cba33de7e2 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_phy.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_PHY_H_ +#define _IGC_PHY_H_ + +#include "igc_mac.h" + +s32 igc_check_reset_block(struct igc_hw *hw); +s32 igc_phy_hw_reset(struct igc_hw *hw); +s32 igc_get_phy_id(struct igc_hw *hw); +s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations, + u32 usec_interval, bool *success); +s32 igc_check_downshift(struct igc_hw *hw); +s32 igc_setup_copper_link(struct igc_hw *hw); +void igc_power_up_phy_copper(struct igc_hw *hw); +void igc_power_down_phy_copper(struct igc_hw *hw); +s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data); +s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data); + +#endif diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h new file mode 100644 index 000000000000..a1bd3216c906 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Intel Corporation */ + +#ifndef _IGC_REGS_H_ +#define _IGC_REGS_H_ + +/* General Register Descriptions */ +#define IGC_CTRL 0x00000 /* Device Control - RW */ +#define IGC_STATUS 0x00008 /* Device Status - RO */ +#define IGC_EECD 0x00010 /* EEPROM/Flash Control - RW */ +#define IGC_CTRL_EXT 0x00018 /* Extended Device Control - RW */ +#define IGC_MDIC 0x00020 /* MDI Control - RW */ +#define IGC_MDICNFG 0x00E04 /* MDC/MDIO Configuration - RW */ +#define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ + +/* Internal Packet Buffer Size Registers */ +#define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ +#define IGC_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ + +/* NVM Register Descriptions */ +#define IGC_EERD 0x12014 /* EEprom mode read - RW */ +#define IGC_EEWR 0x12018 /* EEprom mode write - RW */ + +/* Flow Control Register Descriptions */ +#define IGC_FCAL 0x00028 /* FC Address Low - RW */ +#define IGC_FCAH 0x0002C /* FC Address High - RW */ +#define IGC_FCT 0x00030 /* FC Type - RW */ +#define IGC_FCTTV 0x00170 /* FC Transmit Timer - RW */ +#define IGC_FCRTL 0x02160 /* FC Receive Threshold Low - RW */ +#define IGC_FCRTH 0x02168 /* FC Receive Threshold High - RW */ +#define IGC_FCRTV 0x02460 /* FC Refresh Timer Value - RW */ +#define IGC_FCSTS 0x02464 /* FC Status - RO */ + +/* PCIe Register Description */ +#define IGC_GCR 0x05B00 /* PCIe control- RW */ + +/* Semaphore registers */ +#define IGC_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */ +#define IGC_SWSM 0x05B50 /* SW Semaphore */ +#define IGC_FWSM 0x05B54 /* FW Semaphore */ + +/* Function Active and Power State to MNG */ +#define IGC_FACTPS 0x05B30 + +/* Interrupt Register Description */ +#define IGC_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ +#define IGC_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ +#define IGC_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ +#define IGC_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ +#define IGC_EIAM 0x01530 /* Ext. Interrupt Auto Mask - RW */ +#define IGC_ICR 0x01500 /* Intr Cause Read - RC/W1C */ +#define IGC_ICS 0x01504 /* Intr Cause Set - WO */ +#define IGC_IMS 0x01508 /* Intr Mask Set/Read - RW */ +#define IGC_IMC 0x0150C /* Intr Mask Clear - WO */ +#define IGC_IAM 0x01510 /* Intr Ack Auto Mask- RW */ +/* Intr Throttle - RW */ +#define IGC_EITR(_n) (0x01680 + (0x4 * (_n))) +/* Interrupt Vector Allocation - RW */ +#define IGC_IVAR0 0x01700 +#define IGC_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ +#define IGC_GPIE 0x01514 /* General Purpose Intr Enable - RW */ + +/* Interrupt Cause */ +#define IGC_ICRXPTC 0x04104 /* Rx Packet Timer Expire Count */ +#define IGC_ICRXATC 0x04108 /* Rx Absolute Timer Expire Count */ +#define IGC_ICTXPTC 0x0410C /* Tx Packet Timer Expire Count */ +#define IGC_ICTXATC 0x04110 /* Tx Absolute Timer Expire Count */ +#define IGC_ICTXQEC 0x04118 /* Tx Queue Empty Count */ +#define IGC_ICTXQMTC 0x0411C /* Tx Queue Min Threshold Count */ +#define IGC_ICRXDMTC 0x04120 /* Rx Descriptor Min Threshold Count */ +#define IGC_ICRXOC 0x04124 /* Receiver Overrun Count */ + +#define IGC_CBTMPC 0x0402C /* Circuit Breaker TX Packet Count */ +#define IGC_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ +#define IGC_CBRMPC 0x040FC /* Circuit Breaker RX Packet Count */ +#define IGC_RPTHC 0x04104 /* Rx Packets To Host */ +#define IGC_HGPTC 0x04118 /* Host Good Packets TX Count */ +#define IGC_HTCBDPC 0x04124 /* Host TX Circ.Breaker Drop Count */ + +/* MSI-X Table Register Descriptions */ +#define IGC_PBACL 0x05B68 /* MSIx PBA Clear - R/W 1 to clear */ + +/* Receive Register Descriptions */ +#define IGC_RCTL 0x00100 /* Rx Control - RW */ +#define IGC_SRRCTL(_n) (0x0C00C + ((_n) * 0x40)) +#define IGC_PSRTYPE(_i) (0x05480 + ((_i) * 4)) +#define IGC_RDBAL(_n) (0x0C000 + ((_n) * 0x40)) +#define IGC_RDBAH(_n) (0x0C004 + ((_n) * 0x40)) +#define IGC_RDLEN(_n) (0x0C008 + ((_n) * 0x40)) +#define IGC_RDH(_n) (0x0C010 + ((_n) * 0x40)) +#define IGC_RDT(_n) (0x0C018 + ((_n) * 0x40)) +#define IGC_RXDCTL(_n) (0x0C028 + ((_n) * 0x40)) +#define IGC_RQDPC(_n) (0x0C030 + ((_n) * 0x40)) +#define IGC_RXCSUM 0x05000 /* Rx Checksum Control - RW */ +#define IGC_RLPML 0x05004 /* Rx Long Packet Max Length */ +#define IGC_RFCTL 0x05008 /* Receive Filter Control*/ +#define IGC_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define IGC_UTA 0x0A000 /* Unicast Table Array - RW */ +#define IGC_RAL(_n) (0x05400 + ((_n) * 0x08)) +#define IGC_RAH(_n) (0x05404 + ((_n) * 0x08)) + +/* Transmit Register Descriptions */ +#define IGC_TCTL 0x00400 /* Tx Control - RW */ +#define IGC_TIPG 0x00410 /* Tx Inter-packet gap - RW */ +#define IGC_TDBAL(_n) (0x0E000 + ((_n) * 0x40)) +#define IGC_TDBAH(_n) (0x0E004 + ((_n) * 0x40)) +#define IGC_TDLEN(_n) (0x0E008 + ((_n) * 0x40)) +#define IGC_TDH(_n) (0x0E010 + ((_n) * 0x40)) +#define IGC_TDT(_n) (0x0E018 + ((_n) * 0x40)) +#define IGC_TXDCTL(_n) (0x0E028 + ((_n) * 0x40)) + +/* MMD Register Descriptions */ +#define IGC_MMDAC 13 /* MMD Access Control */ +#define IGC_MMDAAD 14 /* MMD Access Address/Data */ + +/* Good transmitted packets counter registers */ +#define IGC_PQGPTC(_n) (0x010014 + (0x100 * (_n))) + +/* Statistics Register Descriptions */ +#define IGC_CRCERRS 0x04000 /* CRC Error Count - R/clr */ +#define IGC_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ +#define IGC_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ +#define IGC_RXERRC 0x0400C /* Receive Error Count - R/clr */ +#define IGC_MPC 0x04010 /* Missed Packet Count - R/clr */ +#define IGC_SCC 0x04014 /* Single Collision Count - R/clr */ +#define IGC_ECOL 0x04018 /* Excessive Collision Count - R/clr */ +#define IGC_MCC 0x0401C /* Multiple Collision Count - R/clr */ +#define IGC_LATECOL 0x04020 /* Late Collision Count - R/clr */ +#define IGC_COLC 0x04028 /* Collision Count - R/clr */ +#define IGC_DC 0x04030 /* Defer Count - R/clr */ +#define IGC_TNCRS 0x04034 /* Tx-No CRS - R/clr */ +#define IGC_SEC 0x04038 /* Sequence Error Count - R/clr */ +#define IGC_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ +#define IGC_RLEC 0x04040 /* Receive Length Error Count - R/clr */ +#define IGC_XONRXC 0x04048 /* XON Rx Count - R/clr */ +#define IGC_XONTXC 0x0404C /* XON Tx Count - R/clr */ +#define IGC_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ +#define IGC_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ +#define IGC_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ +#define IGC_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ +#define IGC_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ +#define IGC_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ +#define IGC_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ +#define IGC_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ +#define IGC_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ +#define IGC_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ +#define IGC_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ +#define IGC_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ +#define IGC_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ +#define IGC_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ +#define IGC_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ +#define IGC_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ +#define IGC_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ +#define IGC_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ +#define IGC_RUC 0x040A4 /* Rx Undersize Count - R/clr */ +#define IGC_RFC 0x040A8 /* Rx Fragment Count - R/clr */ +#define IGC_ROC 0x040AC /* Rx Oversize Count - R/clr */ +#define IGC_RJC 0x040B0 /* Rx Jabber Count - R/clr */ +#define IGC_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ +#define IGC_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ +#define IGC_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ +#define IGC_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ +#define IGC_TORH 0x040C4 /* Total Octets Rx High - R/clr */ +#define IGC_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ +#define IGC_TOTH 0x040CC /* Total Octets Tx High - R/clr */ +#define IGC_TPR 0x040D0 /* Total Packets Rx - R/clr */ +#define IGC_TPT 0x040D4 /* Total Packets Tx - R/clr */ +#define IGC_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ +#define IGC_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ +#define IGC_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ +#define IGC_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ +#define IGC_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ +#define IGC_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ +#define IGC_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ +#define IGC_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ +#define IGC_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ +#define IGC_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */ +#define IGC_IAC 0x04100 /* Interrupt Assertion Count */ +#define IGC_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */ +#define IGC_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */ +#define IGC_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ +#define IGC_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */ +#define IGC_RPTHC 0x04104 /* Rx Packets To Host */ +#define IGC_HGPTC 0x04118 /* Host Good Packets Tx Count */ +#define IGC_RXDMTC 0x04120 /* Rx Descriptor Minimum Threshold Count */ +#define IGC_HGORCL 0x04128 /* Host Good Octets Received Count Low */ +#define IGC_HGORCH 0x0412C /* Host Good Octets Received Count High */ +#define IGC_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ +#define IGC_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ +#define IGC_LENERRS 0x04138 /* Length Errors Count */ +#define IGC_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ +#define IGC_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ + +/* Management registers */ +#define IGC_MANC 0x05820 /* Management Control - RW */ + +/* Shadow Ram Write Register - RW */ +#define IGC_SRWR 0x12018 + +/* forward declaration */ +struct igc_hw; +u32 igc_rd32(struct igc_hw *hw, u32 reg); + +/* write operations, indexed using DWORDS */ +#define wr32(reg, val) \ +do { \ + u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ + if (!IGC_REMOVED(hw_addr)) \ + writel((val), &hw_addr[(reg)]); \ +} while (0) + +#define rd32(reg) (igc_rd32(hw, reg)) + +#define wrfl() ((void)rd32(IGC_STATUS)) + +#define array_wr32(reg, offset, value) \ + wr32((reg) + ((offset) << 2), (value)) + +#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2))) + +#endif diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index eaac26424486..45b108f8f955 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o octeontx2_mbox-y := mbox.o -octeontx2_af-y := cgx.o rvu.o rvu_cgx.o +octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 5328ecc8cea2..352501b54aee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -29,6 +29,7 @@ * @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion * @cmd_lock: Lock to serialize the command interface * @resp: command response + * @link_info: link related information * @event_cb: callback for linkchange events * @cmd_pend: flag set before new command is started * flag cleared after command response is received @@ -40,6 +41,7 @@ struct lmac { wait_queue_head_t wq_cmd_cmplt; struct mutex cmd_lock; u64 resp; + struct cgx_link_user_info link_info; struct cgx_event_cb event_cb; bool cmd_pend; struct cgx *cgx; @@ -58,6 +60,12 @@ struct cgx { static LIST_HEAD(cgx_list); +/* Convert firmware speed encoding to user format(Mbps) */ +static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX]; + +/* Convert firmware lmac type encoding to string */ +static char *cgx_lmactype_string[LMAC_MODE_MAX]; + /* Supported devices */ static const struct pci_device_id cgx_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) }, @@ -119,6 +127,177 @@ void *cgx_get_pdata(int cgx_id) } EXPORT_SYMBOL(cgx_get_pdata); +/* Ensure the required lock for event queue(where asynchronous events are + * posted) is acquired before calling this API. Else an asynchronous event(with + * latest link status) can reach the destination before this function returns + * and could make the link status appear wrong. + */ +int cgx_get_link_info(void *cgxd, int lmac_id, + struct cgx_link_user_info *linfo) +{ + struct lmac *lmac = lmac_pdata(lmac_id, cgxd); + + if (!lmac) + return -ENODEV; + + *linfo = lmac->link_info; + return 0; +} +EXPORT_SYMBOL(cgx_get_link_info); + +static u64 mac2u64 (u8 *mac_addr) +{ + u64 mac = 0; + int index; + + for (index = ETH_ALEN - 1; index >= 0; index--) + mac |= ((u64)*mac_addr++) << (8 * index); + return mac; +} + +int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + u64 cfg; + + /* copy 6bytes from macaddr */ + /* memcpy(&cfg, mac_addr, 6); */ + + cfg = mac2u64 (mac_addr); + + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (lmac_id * 0x8)), + cfg | CGX_DMAC_CAM_ADDR_ENABLE | ((u64)lmac_id << 49)); + + cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg |= CGX_DMAC_CTL0_CAM_ENABLE; + cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + + return 0; +} +EXPORT_SYMBOL(cgx_lmac_addr_set); + +u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + u64 cfg; + + cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8); + return cfg & CGX_RX_DMAC_ADR_MASK; +} +EXPORT_SYMBOL(cgx_lmac_addr_get); + +static inline u8 cgx_get_lmac_type(struct cgx *cgx, int lmac_id) +{ + u64 cfg; + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + return (cfg >> CGX_LMAC_TYPE_SHIFT) & CGX_LMAC_TYPE_MASK; +} + +/* Configure CGX LMAC in internal loopback mode */ +int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) +{ + struct cgx *cgx = cgxd; + u8 lmac_type; + u64 cfg; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + + lmac_type = cgx_get_lmac_type(cgx, lmac_id); + if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) { + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL); + if (enable) + cfg |= CGXX_GMP_PCS_MRX_CTL_LBK; + else + cfg &= ~CGXX_GMP_PCS_MRX_CTL_LBK; + cgx_write(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL, cfg); + } else { + cfg = cgx_read(cgx, lmac_id, CGXX_SPUX_CONTROL1); + if (enable) + cfg |= CGXX_SPUX_CONTROL1_LBK; + else + cfg &= ~CGXX_SPUX_CONTROL1_LBK; + cgx_write(cgx, lmac_id, CGXX_SPUX_CONTROL1, cfg); + } + return 0; +} +EXPORT_SYMBOL(cgx_lmac_internal_loopback); + +void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable) +{ + struct cgx *cgx = cgx_get_pdata(cgx_id); + u64 cfg = 0; + + if (!cgx) + return; + + if (enable) { + /* Enable promiscuous mode on LMAC */ + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg &= ~(CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE); + cfg |= CGX_DMAC_BCAST_MODE; + cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + + cfg = cgx_read(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8)); + cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE; + cgx_write(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg); + } else { + /* Disable promiscuous mode */ + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg |= CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE; + cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + cfg = cgx_read(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8)); + cfg |= CGX_DMAC_CAM_ADDR_ENABLE; + cgx_write(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg); + } +} +EXPORT_SYMBOL(cgx_lmac_promisc_config); + +int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat) +{ + struct cgx *cgx = cgxd; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8)); + return 0; +} +EXPORT_SYMBOL(cgx_get_rx_stats); + +int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat) +{ + struct cgx *cgx = cgxd; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + *tx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (idx * 8)); + return 0; +} +EXPORT_SYMBOL(cgx_get_tx_stats); + +int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + if (enable) + cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + else + cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); + return 0; +} +EXPORT_SYMBOL(cgx_lmac_rx_tx_enable); + /* CGX Firmware interface low level support */ static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac) { @@ -191,36 +370,79 @@ static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp, return err; } +static inline void cgx_link_usertable_init(void) +{ + cgx_speed_mbps[CGX_LINK_NONE] = 0; + cgx_speed_mbps[CGX_LINK_10M] = 10; + cgx_speed_mbps[CGX_LINK_100M] = 100; + cgx_speed_mbps[CGX_LINK_1G] = 1000; + cgx_speed_mbps[CGX_LINK_2HG] = 2500; + cgx_speed_mbps[CGX_LINK_5G] = 5000; + cgx_speed_mbps[CGX_LINK_10G] = 10000; + cgx_speed_mbps[CGX_LINK_20G] = 20000; + cgx_speed_mbps[CGX_LINK_25G] = 25000; + cgx_speed_mbps[CGX_LINK_40G] = 40000; + cgx_speed_mbps[CGX_LINK_50G] = 50000; + cgx_speed_mbps[CGX_LINK_100G] = 100000; + + cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII"; + cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI"; + cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI"; + cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R"; + cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R"; + cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII"; + cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R"; + cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R"; + cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R"; + cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII"; +} + +static inline void link_status_user_format(u64 lstat, + struct cgx_link_user_info *linfo, + struct cgx *cgx, u8 lmac_id) +{ + char *lmac_string; + + linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); + linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); + linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; + linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id); + lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; + strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); +} + /* Hardware event handlers */ static inline void cgx_link_change_handler(u64 lstat, struct lmac *lmac) { + struct cgx_link_user_info *linfo; struct cgx *cgx = lmac->cgx; struct cgx_link_event event; struct device *dev; + int err_type; dev = &cgx->pdev->dev; - event.lstat.link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); - event.lstat.full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); - event.lstat.speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat); - event.lstat.err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat); + link_status_user_format(lstat, &event.link_uinfo, cgx, lmac->lmac_id); + err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat); event.cgx_id = cgx->cgx_id; event.lmac_id = lmac->lmac_id; + /* update the local copy of link status */ + lmac->link_info = event.link_uinfo; + linfo = &lmac->link_info; + if (!lmac->event_cb.notify_link_chg) { dev_dbg(dev, "cgx port %d:%d Link change handler null", cgx->cgx_id, lmac->lmac_id); - if (event.lstat.err_type != CGX_ERR_NONE) { + if (err_type != CGX_ERR_NONE) { dev_err(dev, "cgx port %d:%d Link error %d\n", - cgx->cgx_id, lmac->lmac_id, - event.lstat.err_type); + cgx->cgx_id, lmac->lmac_id, err_type); } - dev_info(dev, "cgx port %d:%d Link status %s, speed %x\n", + dev_info(dev, "cgx port %d:%d Link is %s %d Mbps\n", cgx->cgx_id, lmac->lmac_id, - event.lstat.link_up ? "UP" : "DOWN", - event.lstat.speed); + linfo->link_up ? "UP" : "DOWN", linfo->speed); return; } @@ -448,6 +670,8 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) list_add(&cgx->cgx_list, &cgx_list); cgx->cgx_id = cgx_get_cgx_cnt() - 1; + cgx_link_usertable_init(); + err = cgx_lmac_init(cgx); if (err) goto err_release_lmac; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index a2a7a6d72d32..ada25ed0765b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -11,6 +11,7 @@ #ifndef CGX_H #define CGX_H +#include "mbox.h" #include "cgx_fw_if.h" /* PCI device IDs */ @@ -24,14 +25,35 @@ #define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX) /* Registers */ +#define CGXX_CMRX_CFG 0x00 +#define CMR_EN BIT_ULL(55) +#define DATA_PKT_TX_EN BIT_ULL(53) +#define DATA_PKT_RX_EN BIT_ULL(54) +#define CGX_LMAC_TYPE_SHIFT 40 +#define CGX_LMAC_TYPE_MASK 0xF #define CGXX_CMRX_INT 0x040 #define FW_CGX_INT BIT_ULL(1) #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 +#define CGXX_CMRX_RX_STAT0 0x070 #define CGXX_CMRX_RX_LMACS 0x128 +#define CGXX_CMRX_RX_DMAC_CTL0 0x1F8 +#define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) +#define CGX_DMAC_CAM_ACCEPT BIT_ULL(3) +#define CGX_DMAC_MCAST_MODE BIT_ULL(1) +#define CGX_DMAC_BCAST_MODE BIT_ULL(0) +#define CGXX_CMRX_RX_DMAC_CAM0 0x200 +#define CGX_DMAC_CAM_ADDR_ENABLE BIT_ULL(48) +#define CGXX_CMRX_RX_DMAC_CAM1 0x400 +#define CGX_RX_DMAC_ADR_MASK GENMASK_ULL(47, 0) +#define CGXX_CMRX_TX_STAT0 0x700 #define CGXX_SCRATCH0_REG 0x1050 #define CGXX_SCRATCH1_REG 0x1058 #define CGX_CONST 0x2000 +#define CGXX_SPUX_CONTROL1 0x10000 +#define CGXX_SPUX_CONTROL1_LBK BIT_ULL(14) +#define CGXX_GMP_PCS_MRX_CTL 0x30000 +#define CGXX_GMP_PCS_MRX_CTL_LBK BIT_ULL(14) #define CGX_COMMAND_REG CGXX_SCRATCH1_REG #define CGX_EVENT_REG CGXX_SCRATCH0_REG @@ -40,8 +62,22 @@ #define CGX_NVEC 37 #define CGX_LMAC_FWI 0 +enum LMAC_TYPE { + LMAC_MODE_SGMII = 0, + LMAC_MODE_XAUI = 1, + LMAC_MODE_RXAUI = 2, + LMAC_MODE_10G_R = 3, + LMAC_MODE_40G_R = 4, + LMAC_MODE_QSGMII = 6, + LMAC_MODE_25G_R = 7, + LMAC_MODE_50G_R = 8, + LMAC_MODE_100G_R = 9, + LMAC_MODE_USXGMII = 10, + LMAC_MODE_MAX, +}; + struct cgx_link_event { - struct cgx_lnk_sts lstat; + struct cgx_link_user_info link_uinfo; u8 cgx_id; u8 lmac_id; }; @@ -62,4 +98,13 @@ int cgx_get_cgx_cnt(void); int cgx_get_lmac_cnt(void *cgxd); void *cgx_get_pdata(int cgx_id); int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id); +int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat); +int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat); +int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable); +int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr); +u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id); +void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable); +int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable); +int cgx_get_link_info(void *cgxd, int lmac_id, + struct cgx_link_user_info *linfo); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h new file mode 100644 index 000000000000..28eb691185f4 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2018 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef COMMON_H +#define COMMON_H + +#include "rvu_struct.h" + +#define OTX2_ALIGN 128 /* Align to cacheline */ + +#define Q_SIZE_16 0ULL /* 16 entries */ +#define Q_SIZE_64 1ULL /* 64 entries */ +#define Q_SIZE_256 2ULL +#define Q_SIZE_1K 3ULL +#define Q_SIZE_4K 4ULL +#define Q_SIZE_16K 5ULL +#define Q_SIZE_64K 6ULL +#define Q_SIZE_256K 7ULL +#define Q_SIZE_1M 8ULL /* Million entries */ +#define Q_SIZE_MIN Q_SIZE_16 +#define Q_SIZE_MAX Q_SIZE_1M + +#define Q_COUNT(x) (16ULL << (2 * x)) +#define Q_SIZE(x, n) ((ilog2(x) - (n)) / 2) + +/* Admin queue info */ + +/* Since we intend to add only one instruction at a time, + * keep queue size to it's minimum. + */ +#define AQ_SIZE Q_SIZE_16 +/* HW head & tail pointer mask */ +#define AQ_PTR_MASK 0xFFFFF + +struct qmem { + void *base; + dma_addr_t iova; + int alloc_sz; + u8 entry_sz; + u8 align; + u32 qsize; +}; + +static inline int qmem_alloc(struct device *dev, struct qmem **q, + int qsize, int entry_sz) +{ + struct qmem *qmem; + int aligned_addr; + + if (!qsize) + return -EINVAL; + + *q = devm_kzalloc(dev, sizeof(*qmem), GFP_KERNEL); + if (!*q) + return -ENOMEM; + qmem = *q; + + qmem->entry_sz = entry_sz; + qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN; + qmem->base = dma_zalloc_coherent(dev, qmem->alloc_sz, + &qmem->iova, GFP_KERNEL); + if (!qmem->base) + return -ENOMEM; + + qmem->qsize = qsize; + + aligned_addr = ALIGN((u64)qmem->iova, OTX2_ALIGN); + qmem->align = (aligned_addr - qmem->iova); + qmem->base += qmem->align; + qmem->iova += qmem->align; + return 0; +} + +static inline void qmem_free(struct device *dev, struct qmem *qmem) +{ + if (!qmem) + return; + + if (qmem->base) + dma_free_coherent(dev, qmem->alloc_sz, + qmem->base - qmem->align, + qmem->iova - qmem->align); + devm_kfree(dev, qmem); +} + +struct admin_queue { + struct qmem *inst; + struct qmem *res; + spinlock_t lock; /* Serialize inst enqueue from PFs */ +}; + +/* NPA aura count */ +enum npa_aura_sz { + NPA_AURA_SZ_0, + NPA_AURA_SZ_128, + NPA_AURA_SZ_256, + NPA_AURA_SZ_512, + NPA_AURA_SZ_1K, + NPA_AURA_SZ_2K, + NPA_AURA_SZ_4K, + NPA_AURA_SZ_8K, + NPA_AURA_SZ_16K, + NPA_AURA_SZ_32K, + NPA_AURA_SZ_64K, + NPA_AURA_SZ_128K, + NPA_AURA_SZ_256K, + NPA_AURA_SZ_512K, + NPA_AURA_SZ_1M, + NPA_AURA_SZ_MAX, +}; + +#define NPA_AURA_COUNT(x) (1ULL << ((x) + 6)) + +/* NPA AQ result structure for init/read/write of aura HW contexts */ +struct npa_aq_aura_res { + struct npa_aq_res_s res; + struct npa_aura_s aura_ctx; + struct npa_aura_s ctx_mask; +}; + +/* NPA AQ result structure for init/read/write of pool HW contexts */ +struct npa_aq_pool_res { + struct npa_aq_res_s res; + struct npa_pool_s pool_ctx; + struct npa_pool_s ctx_mask; +}; + +/* NIX Transmit schedulers */ +enum nix_scheduler { + NIX_TXSCH_LVL_SMQ = 0x0, + NIX_TXSCH_LVL_MDQ = 0x0, + NIX_TXSCH_LVL_TL4 = 0x1, + NIX_TXSCH_LVL_TL3 = 0x2, + NIX_TXSCH_LVL_TL2 = 0x3, + NIX_TXSCH_LVL_TL1 = 0x4, + NIX_TXSCH_LVL_CNT = 0x5, +}; + +/* NIX LSO format indices. + * As of now TSO is the only one using, so statically assigning indices. + */ +#define NIX_LSO_FORMAT_IDX_TSOV4 0 +#define NIX_LSO_FORMAT_IDX_TSOV6 1 + +/* RSS info */ +#define MAX_RSS_GROUPS 8 +/* Group 0 has to be used in default pkt forwarding MCAM entries + * reserved for NIXLFs. Groups 1-7 can be used for RSS for ntuple + * filters. + */ +#define DEFAULT_RSS_CONTEXT_GROUP 0 +#define MAX_RSS_INDIR_TBL_SIZE 256 /* 1 << Max adder bits */ + +#endif /* COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index bedf0ee62450..c339024d04e0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -124,21 +124,50 @@ M(ATTACH_RESOURCES, 0x002, rsrc_attach, msg_rsp) \ M(DETACH_RESOURCES, 0x003, rsrc_detach, msg_rsp) \ M(MSIX_OFFSET, 0x004, msg_req, msix_offset_rsp) \ /* CGX mbox IDs (range 0x200 - 0x3FF) */ \ +M(CGX_START_RXTX, 0x200, msg_req, msg_rsp) \ +M(CGX_STOP_RXTX, 0x201, msg_req, msg_rsp) \ +M(CGX_STATS, 0x202, msg_req, cgx_stats_rsp) \ +M(CGX_MAC_ADDR_SET, 0x203, cgx_mac_addr_set_or_get, \ + cgx_mac_addr_set_or_get) \ +M(CGX_MAC_ADDR_GET, 0x204, cgx_mac_addr_set_or_get, \ + cgx_mac_addr_set_or_get) \ +M(CGX_PROMISC_ENABLE, 0x205, msg_req, msg_rsp) \ +M(CGX_PROMISC_DISABLE, 0x206, msg_req, msg_rsp) \ +M(CGX_START_LINKEVENTS, 0x207, msg_req, msg_rsp) \ +M(CGX_STOP_LINKEVENTS, 0x208, msg_req, msg_rsp) \ +M(CGX_GET_LINKINFO, 0x209, msg_req, cgx_link_info_msg) \ +M(CGX_INTLBK_ENABLE, 0x20A, msg_req, msg_rsp) \ +M(CGX_INTLBK_DISABLE, 0x20B, msg_req, msg_rsp) \ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ +M(NPA_LF_ALLOC, 0x400, npa_lf_alloc_req, npa_lf_alloc_rsp) \ +M(NPA_LF_FREE, 0x401, msg_req, msg_rsp) \ +M(NPA_AQ_ENQ, 0x402, npa_aq_enq_req, npa_aq_enq_rsp) \ +M(NPA_HWCTX_DISABLE, 0x403, hwctx_disable_req, msg_rsp) \ /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */ \ /* TIM mbox IDs (range 0x800 - 0x9FF) */ \ /* CPT mbox IDs (range 0xA00 - 0xBFF) */ \ /* NPC mbox IDs (range 0x6000 - 0x7FFF) */ \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ +M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc_req, nix_lf_alloc_rsp) \ +M(NIX_LF_FREE, 0x8001, msg_req, msg_rsp) \ +M(NIX_AQ_ENQ, 0x8002, nix_aq_enq_req, nix_aq_enq_rsp) \ +M(NIX_HWCTX_DISABLE, 0x8003, hwctx_disable_req, msg_rsp) + +/* Messages initiated by AF (range 0xC00 - 0xDFF) */ +#define MBOX_UP_CGX_MESSAGES \ +M(CGX_LINK_EVENT, 0xC00, cgx_link_info_msg, msg_rsp) enum { #define M(_name, _id, _1, _2) MBOX_MSG_ ## _name = _id, MBOX_MESSAGES +MBOX_UP_CGX_MESSAGES #undef M }; /* Mailbox message formats */ +#define RVU_DEFAULT_PF_FUNC 0xFFFF + /* Generic request msg used for those mbox messages which * don't send any data in the request. */ @@ -208,4 +237,181 @@ struct msix_offset_rsp { u16 cptlf_msixoff[MAX_RVU_BLKLF_CNT]; }; +/* CGX mbox message formats */ + +struct cgx_stats_rsp { + struct mbox_msghdr hdr; +#define CGX_RX_STATS_COUNT 13 +#define CGX_TX_STATS_COUNT 18 + u64 rx_stats[CGX_RX_STATS_COUNT]; + u64 tx_stats[CGX_TX_STATS_COUNT]; +}; + +/* Structure for requesting the operation for + * setting/getting mac address in the CGX interface + */ +struct cgx_mac_addr_set_or_get { + struct mbox_msghdr hdr; + u8 mac_addr[ETH_ALEN]; +}; + +struct cgx_link_user_info { + uint64_t link_up:1; + uint64_t full_duplex:1; + uint64_t lmac_type_id:4; + uint64_t speed:20; /* speed in Mbps */ +#define LMACTYPE_STR_LEN 16 + char lmac_type[LMACTYPE_STR_LEN]; +}; + +struct cgx_link_info_msg { + struct mbox_msghdr hdr; + struct cgx_link_user_info link_info; +}; + +/* NPA mbox message formats */ + +/* NPA mailbox error codes + * Range 301 - 400. + */ +enum npa_af_status { + NPA_AF_ERR_PARAM = -301, + NPA_AF_ERR_AQ_FULL = -302, + NPA_AF_ERR_AQ_ENQUEUE = -303, + NPA_AF_ERR_AF_LF_INVALID = -304, + NPA_AF_ERR_AF_LF_ALLOC = -305, + NPA_AF_ERR_LF_RESET = -306, +}; + +/* For NPA LF context alloc and init */ +struct npa_lf_alloc_req { + struct mbox_msghdr hdr; + int node; + int aura_sz; /* No of auras */ + u32 nr_pools; /* No of pools */ +}; + +struct npa_lf_alloc_rsp { + struct mbox_msghdr hdr; + u32 stack_pg_ptrs; /* No of ptrs per stack page */ + u32 stack_pg_bytes; /* Size of stack page */ + u16 qints; /* NPA_AF_CONST::QINTS */ +}; + +/* NPA AQ enqueue msg */ +struct npa_aq_enq_req { + struct mbox_msghdr hdr; + u32 aura_id; + u8 ctype; + u8 op; + union { + /* Valid when op == WRITE/INIT and ctype == AURA. + * LF fills the pool_id in aura.pool_addr. AF will translate + * the pool_id to pool context pointer. + */ + struct npa_aura_s aura; + /* Valid when op == WRITE/INIT and ctype == POOL */ + struct npa_pool_s pool; + }; + /* Mask data when op == WRITE (1=write, 0=don't write) */ + union { + /* Valid when op == WRITE and ctype == AURA */ + struct npa_aura_s aura_mask; + /* Valid when op == WRITE and ctype == POOL */ + struct npa_pool_s pool_mask; + }; +}; + +struct npa_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + /* Valid when op == READ and ctype == AURA */ + struct npa_aura_s aura; + /* Valid when op == READ and ctype == POOL */ + struct npa_pool_s pool; + }; +}; + +/* Disable all contexts of type 'ctype' */ +struct hwctx_disable_req { + struct mbox_msghdr hdr; + u8 ctype; +}; + +/* NIX mailbox error codes + * Range 401 - 500. + */ +enum nix_af_status { + NIX_AF_ERR_PARAM = -401, + NIX_AF_ERR_AQ_FULL = -402, + NIX_AF_ERR_AQ_ENQUEUE = -403, + NIX_AF_ERR_AF_LF_INVALID = -404, + NIX_AF_ERR_AF_LF_ALLOC = -405, + NIX_AF_ERR_TLX_ALLOC_FAIL = -406, + NIX_AF_ERR_TLX_INVALID = -407, + NIX_AF_ERR_RSS_SIZE_INVALID = -408, + NIX_AF_ERR_RSS_GRPS_INVALID = -409, + NIX_AF_ERR_FRS_INVALID = -410, + NIX_AF_ERR_RX_LINK_INVALID = -411, + NIX_AF_INVAL_TXSCHQ_CFG = -412, + NIX_AF_SMQ_FLUSH_FAILED = -413, + NIX_AF_ERR_LF_RESET = -414, +}; + +/* For NIX LF context alloc and init */ +struct nix_lf_alloc_req { + struct mbox_msghdr hdr; + int node; + u32 rq_cnt; /* No of receive queues */ + u32 sq_cnt; /* No of send queues */ + u32 cq_cnt; /* No of completion queues */ + u8 xqe_sz; + u16 rss_sz; + u8 rss_grps; + u16 npa_func; + u16 sso_func; + u64 rx_cfg; /* See NIX_AF_LF(0..127)_RX_CFG */ +}; + +struct nix_lf_alloc_rsp { + struct mbox_msghdr hdr; + u16 sqb_size; + u8 lso_tsov4_idx; + u8 lso_tsov6_idx; + u8 mac_addr[ETH_ALEN]; +}; + +/* NIX AQ enqueue msg */ +struct nix_aq_enq_req { + struct mbox_msghdr hdr; + u32 qidx; + u8 ctype; + u8 op; + union { + struct nix_rq_ctx_s rq; + struct nix_sq_ctx_s sq; + struct nix_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + }; + union { + struct nix_rq_ctx_s rq_mask; + struct nix_sq_ctx_s sq_mask; + struct nix_cq_ctx_s cq_mask; + struct nix_rsse_s rss_mask; + struct nix_rx_mce_s mce_mask; + }; +}; + +struct nix_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + struct nix_rq_ctx_s rq; + struct nix_sq_ctx_s sq; + struct nix_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + }; +}; + #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 2033f42c3226..c06cca9c9e1a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -47,18 +47,18 @@ MODULE_DEVICE_TABLE(pci, rvu_id_table); */ int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero) { + unsigned long timeout = jiffies + usecs_to_jiffies(100); void __iomem *reg; - int timeout = 100; u64 reg_val; reg = rvu->afreg_base + ((block << 28) | offset); - while (timeout) { + while (time_before(jiffies, timeout)) { reg_val = readq(reg); if (zero && !(reg_val & mask)) return 0; if (!zero && (reg_val & mask)) return 0; - usleep_range(1, 2); + usleep_range(1, 5); timeout--; } return -EBUSY; @@ -361,6 +361,19 @@ static void rvu_check_block_implemented(struct rvu *rvu) } } +int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf) +{ + int err; + + if (!block->implemented) + return 0; + + rvu_write64(rvu, block->addr, block->lfreset_reg, lf | BIT_ULL(12)); + err = rvu_poll_reg(rvu, block->addr, block->lfreset_reg, BIT_ULL(12), + true); + return err; +} + static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg) { struct rvu_block *block = &rvu->hw->block[blkaddr]; @@ -552,6 +565,9 @@ static void rvu_free_hw_resources(struct rvu *rvu) int id, max_msix; u64 cfg; + rvu_npa_freemem(rvu); + rvu_nix_freemem(rvu); + /* Free block LF bitmaps */ for (id = 0; id < BLK_COUNT; id++) { block = &hw->block[id]; @@ -755,6 +771,54 @@ init: rvu_scan_block(rvu, block); } + err = rvu_npa_init(rvu); + if (err) + return err; + + err = rvu_nix_init(rvu); + if (err) + return err; + + return 0; +} + +/* NPA and NIX admin queue APIs */ +void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq) +{ + if (!aq) + return; + + qmem_free(rvu->dev, aq->inst); + qmem_free(rvu->dev, aq->res); + devm_kfree(rvu->dev, aq); +} + +int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue, + int qsize, int inst_size, int res_size) +{ + struct admin_queue *aq; + int err; + + *ad_queue = devm_kzalloc(rvu->dev, sizeof(*aq), GFP_KERNEL); + if (!*ad_queue) + return -ENOMEM; + aq = *ad_queue; + + /* Alloc memory for instructions i.e AQ */ + err = qmem_alloc(rvu->dev, &aq->inst, qsize, inst_size); + if (err) { + devm_kfree(rvu->dev, aq); + return err; + } + + /* Alloc memory for results */ + err = qmem_alloc(rvu->dev, &aq->res, qsize, res_size); + if (err) { + rvu_aq_free(rvu, aq); + return err; + } + + spin_lock_init(&aq->lock); return 0; } @@ -1316,6 +1380,63 @@ static void rvu_mbox_handler(struct work_struct *work) otx2_mbox_msg_send(mbox, pf); } +static void rvu_mbox_up_handler(struct work_struct *work) +{ + struct rvu_work *mwork = container_of(work, struct rvu_work, work); + struct rvu *rvu = mwork->rvu; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + int offset, id; + u16 pf; + + mbox = &rvu->mbox_up; + pf = mwork - rvu->mbox_wrk_up; + mdev = &mbox->dev[pf]; + + rsp_hdr = mdev->mbase + mbox->rx_start; + if (rsp_hdr->num_msgs == 0) { + dev_warn(rvu->dev, "mbox up handler: num_msgs = 0\n"); + return; + } + + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (id = 0; id < rsp_hdr->num_msgs; id++) { + msg = mdev->mbase + offset; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(rvu->dev, + "Mbox msg with unknown ID 0x%x\n", msg->id); + goto end; + } + + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(rvu->dev, + "Mbox msg with wrong signature %x, ID 0x%x\n", + msg->sig, msg->id); + goto end; + } + + switch (msg->id) { + case MBOX_MSG_CGX_LINK_EVENT: + break; + default: + if (msg->rc) + dev_err(rvu->dev, + "Mbox msg response has err %d, ID 0x%x\n", + msg->rc, msg->id); + break; + } +end: + offset = mbox->rx_start + msg->next_msgoff; + mdev->msgs_acked++; + } + + otx2_mbox_reset(mbox, 0); +} + static int rvu_mbox_init(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; @@ -1337,6 +1458,13 @@ static int rvu_mbox_init(struct rvu *rvu) goto exit; } + rvu->mbox_wrk_up = devm_kcalloc(rvu->dev, hw->total_pfs, + sizeof(struct rvu_work), GFP_KERNEL); + if (!rvu->mbox_wrk_up) { + err = -ENOMEM; + goto exit; + } + /* Map mbox region shared with PFs */ bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR); /* Mailbox is a reserved memory (in RAM) region shared between @@ -1355,12 +1483,23 @@ static int rvu_mbox_init(struct rvu *rvu) if (err) goto exit; + err = otx2_mbox_init(&rvu->mbox_up, hwbase, rvu->pdev, rvu->afreg_base, + MBOX_DIR_AFPF_UP, hw->total_pfs); + if (err) + goto exit; + for (pf = 0; pf < hw->total_pfs; pf++) { mwork = &rvu->mbox_wrk[pf]; mwork->rvu = rvu; INIT_WORK(&mwork->work, rvu_mbox_handler); } + for (pf = 0; pf < hw->total_pfs; pf++) { + mwork = &rvu->mbox_wrk_up[pf]; + mwork->rvu = rvu; + INIT_WORK(&mwork->work, rvu_mbox_up_handler); + } + return 0; exit: if (hwbase) @@ -1381,6 +1520,7 @@ static void rvu_mbox_destroy(struct rvu *rvu) iounmap((void __iomem *)rvu->mbox.hwbase); otx2_mbox_destroy(&rvu->mbox); + otx2_mbox_destroy(&rvu->mbox_up); } static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) @@ -1407,6 +1547,12 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) if (hdr->num_msgs) queue_work(rvu->mbox_wq, &rvu->mbox_wrk[pf].work); + mbox = &rvu->mbox_up; + mdev = &mbox->dev[pf]; + hdr = mdev->mbase + mbox->rx_start; + if (hdr->num_msgs) + queue_work(rvu->mbox_wq, + &rvu->mbox_wrk_up[pf].work); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d169fa9eb45e..b48b5af83f1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -12,6 +12,7 @@ #define RVU_H #include "rvu_struct.h" +#include "common.h" #include "mbox.h" /* PCI device IDs */ @@ -41,7 +42,8 @@ struct rsrc_bmap { }; struct rvu_block { - struct rsrc_bmap lf; + struct rsrc_bmap lf; + struct admin_queue *aq; /* NIX/NPA AQ */ u16 *fn_map; /* LF to pcifunc mapping */ bool multislot; bool implemented; @@ -70,14 +72,50 @@ struct rvu_pfvf { struct rsrc_bmap msix; /* Bitmap for MSIX vector alloc */ #define MSIX_BLKLF(blkaddr, lf) (((blkaddr) << 8) | ((lf) & 0xFF)) u16 *msix_lfmap; /* Vector to block LF mapping */ + + /* NPA contexts */ + struct qmem *aura_ctx; + struct qmem *pool_ctx; + struct qmem *npa_qints_ctx; + unsigned long *aura_bmap; + unsigned long *pool_bmap; + + /* NIX contexts */ + struct qmem *rq_ctx; + struct qmem *sq_ctx; + struct qmem *cq_ctx; + struct qmem *rss_ctx; + struct qmem *cq_ints_ctx; + struct qmem *nix_qints_ctx; + unsigned long *sq_bmap; + unsigned long *rq_bmap; + unsigned long *cq_bmap; + + u8 mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */ +}; + +struct nix_txsch { + struct rsrc_bmap schq; + u8 lvl; + u16 *pfvf_map; +}; + +struct nix_hw { + struct nix_txsch txsch[NIX_TXSCH_LVL_CNT]; /* Tx schedulers */ }; struct rvu_hwinfo { u8 total_pfs; /* MAX RVU PFs HW supports */ u16 total_vfs; /* Max RVU VFs HW supports */ u16 max_vfs_per_pf; /* Max VFs that can be attached to a PF */ + u8 cgx; + u8 lmac_per_cgx; + u8 cgx_links; + u8 lbk_links; + u8 sdp_links; struct rvu_block block[BLK_COUNT]; /* Block info */ + struct nix_hw *nix0; }; struct rvu { @@ -93,6 +131,8 @@ struct rvu { /* Mbox */ struct otx2_mbox mbox; struct rvu_work *mbox_wrk; + struct otx2_mbox mbox_up; + struct rvu_work *mbox_wrk_up; struct workqueue_struct *mbox_wq; /* MSI-X */ @@ -109,6 +149,7 @@ struct rvu { u16 *cgxlmac2pf_map; /* bitmap of mapped pfs for * every cgx lmac port */ + unsigned long pf_notify_bmap; /* Flags for PF notification */ void **cgx_idmap; /* cgx id to cgx data map table */ struct work_struct cgx_evh_work; struct workqueue_struct *cgx_evh_wq; @@ -149,10 +190,84 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc); void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf); bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr); int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot); +int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc); int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero); +/* NPA/NIX AQ APIs */ +int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue, + int qsize, int inst_size, int res_size); +void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq); + /* CGX APIs */ +static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf) +{ + return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs); +} + +static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id) +{ + *cgx_id = (map >> 4) & 0xF; + *lmac_id = (map & 0xF); +} + int rvu_cgx_probe(struct rvu *rvu); void rvu_cgx_wq_destroy(struct rvu *rvu); +int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start); +int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req, + struct cgx_stats_rsp *rsp); +int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp); +int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp); +int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req, + struct cgx_link_info_msg *rsp); +int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); + +/* NPA APIs */ +int rvu_npa_init(struct rvu *rvu); +void rvu_npa_freemem(struct rvu *rvu); +int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu, + struct npa_aq_enq_req *req, + struct npa_aq_enq_rsp *rsp); +int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu, + struct npa_lf_alloc_req *req, + struct npa_lf_alloc_rsp *rsp); +int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); + +/* NIX APIs */ +int rvu_nix_init(struct rvu *rvu); +void rvu_nix_freemem(struct rvu *rvu); +int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu, + struct nix_lf_alloc_req *req, + struct nix_lf_alloc_rsp *rsp); +int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu, + struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp); +int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp); #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 5ecc22308b22..e0aee2176637 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -20,6 +20,31 @@ struct cgx_evq_entry { struct cgx_link_event link_event; }; +#define M(_name, _id, _req_type, _rsp_type) \ +static struct _req_type __maybe_unused \ +*otx2_mbox_alloc_msg_ ## _name(struct rvu *rvu, int devid) \ +{ \ + struct _req_type *req; \ + \ + req = (struct _req_type *)otx2_mbox_alloc_msg_rsp( \ + &rvu->mbox_up, devid, sizeof(struct _req_type), \ + sizeof(struct _rsp_type)); \ + if (!req) \ + return NULL; \ + req->hdr.sig = OTX2_MBOX_REQ_SIG; \ + req->hdr.id = _id; \ + return req; \ +} + +MBOX_UP_CGX_MESSAGES +#undef M + +/* Returns bitmap of mapped PFs */ +static inline u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id) +{ + return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id]; +} + static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id) { return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF); @@ -77,6 +102,34 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) return 0; } +static int rvu_cgx_send_link_info(int cgx_id, int lmac_id, struct rvu *rvu) +{ + struct cgx_evq_entry *qentry; + unsigned long flags; + int err; + + qentry = kmalloc(sizeof(*qentry), GFP_KERNEL); + if (!qentry) + return -ENOMEM; + + /* Lock the event queue before we read the local link status */ + spin_lock_irqsave(&rvu->cgx_evq_lock, flags); + err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id, + &qentry->link_event.link_uinfo); + qentry->link_event.cgx_id = cgx_id; + qentry->link_event.lmac_id = lmac_id; + if (err) + goto skip_add; + list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head); +skip_add: + spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags); + + /* start worker to process the events */ + queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work); + + return 0; +} + /* This is called from interrupt context and is expected to be atomic */ static int cgx_lmac_postevent(struct cgx_link_event *event, void *data) { @@ -98,6 +151,41 @@ static int cgx_lmac_postevent(struct cgx_link_event *event, void *data) return 0; } +static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) +{ + struct cgx_link_user_info *linfo; + struct cgx_link_info_msg *msg; + unsigned long pfmap; + int err, pfid; + + linfo = &event->link_uinfo; + pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id); + + do { + pfid = find_first_bit(&pfmap, 16); + clear_bit(pfid, &pfmap); + + /* check if notification is enabled */ + if (!test_bit(pfid, &rvu->pf_notify_bmap)) { + dev_info(rvu->dev, "cgx %d: lmac %d Link status %s\n", + event->cgx_id, event->lmac_id, + linfo->link_up ? "UP" : "DOWN"); + continue; + } + + /* Send mbox message to PF */ + msg = otx2_mbox_alloc_msg_CGX_LINK_EVENT(rvu, pfid); + if (!msg) + continue; + msg->link_info = *linfo; + otx2_mbox_msg_send(&rvu->mbox_up, pfid); + err = otx2_mbox_wait_for_rsp(&rvu->mbox_up, pfid); + if (err) + dev_warn(rvu->dev, "notification to pf %d failed\n", + pfid); + } while (pfmap); +} + static void cgx_evhandler_task(struct work_struct *work) { struct rvu *rvu = container_of(work, struct rvu, cgx_evh_work); @@ -119,7 +207,8 @@ static void cgx_evhandler_task(struct work_struct *work) event = &qentry->link_event; - /* Do nothing for now */ + /* process event */ + cgx_notify_pfs(event, rvu); kfree(qentry); } while (1); } @@ -192,3 +281,232 @@ int rvu_cgx_probe(struct rvu *rvu) cgx_lmac_event_handler_init(rvu); return 0; } + +int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start); + + return 0; +} + +int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, true); + return 0; +} + +int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, false); + return 0; +} + +int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req, + struct cgx_stats_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + int stat = 0, err = 0; + u64 tx_stat, rx_stat; + u8 cgx_idx, lmac; + void *cgxd; + + if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) || + !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); + cgxd = rvu_cgx_pdata(cgx_idx, rvu); + + /* Rx stats */ + while (stat < CGX_RX_STATS_COUNT) { + err = cgx_get_rx_stats(cgxd, lmac, stat, &rx_stat); + if (err) + return err; + rsp->rx_stats[stat] = rx_stat; + stat++; + } + + /* Tx stats */ + stat = 0; + while (stat < CGX_TX_STATS_COUNT) { + err = cgx_get_tx_stats(cgxd, lmac, stat, &tx_stat); + if (err) + return err; + rsp->tx_stats[stat] = tx_stat; + stat++; + } + return 0; +} + +int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr); + + return 0; +} + +int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + int rc = 0, i; + u64 cfg; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + rsp->hdr.rc = rc; + cfg = cgx_lmac_addr_get(cgx_id, lmac_id); + /* copy 48 bit mac address to req->mac_addr */ + for (i = 0; i < ETH_ALEN; i++) + rsp->mac_addr[i] = cfg >> (ETH_ALEN - 1 - i) * 8; + return 0; +} + +int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) || + !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_promisc_config(cgx_id, lmac_id, true); + return 0; +} + +int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) || + !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_promisc_config(cgx_id, lmac_id, false); + return 0; +} + +static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + if (en) { + set_bit(pf, &rvu->pf_notify_bmap); + /* Send the current link status to PF */ + rvu_cgx_send_link_info(cgx_id, lmac_id, rvu); + } else { + clear_bit(pf, &rvu->pf_notify_bmap); + } + + return 0; +} + +int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, true); + return 0; +} + +int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, false); + return 0; +} + +int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req, + struct cgx_link_info_msg *rsp) +{ + u8 cgx_id, lmac_id; + int pf, err; + + pf = rvu_get_pf(req->hdr.pcifunc); + + if (!is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id, + &rsp->link_info); + return err; +} + +static int rvu_cgx_config_intlbk(struct rvu *rvu, u16 pcifunc, bool en) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + return cgx_lmac_internal_loopback(rvu_cgx_pdata(cgx_id, rvu), + lmac_id, en); +} + +int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, true); + return 0; +} + +int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, false); + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c new file mode 100644 index 000000000000..214ca2c26ab4 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -0,0 +1,892 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2018 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "rvu_struct.h" +#include "rvu_reg.h" +#include "rvu.h" +#include "cgx.h" + +static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) +{ + if (blkaddr == BLKADDR_NIX0 && hw->nix0) + return hw->nix0; + + return NULL; +} + +static bool is_valid_txschq(struct rvu *rvu, int blkaddr, + int lvl, u16 pcifunc, u16 schq) +{ + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return false; + + txsch = &nix_hw->txsch[lvl]; + /* Check out of bounds */ + if (schq >= txsch->schq.max) + return false; + + spin_lock(&rvu->rsrc_lock); + if (txsch->pfvf_map[schq] != pcifunc) { + spin_unlock(&rvu->rsrc_lock); + return false; + } + spin_unlock(&rvu->rsrc_lock); + return true; +} + +static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr, + u64 format, bool v4, u64 *fidx) +{ + struct nix_lso_format field = {0}; + + /* IP's Length field */ + field.layer = NIX_TXLAYER_OL3; + /* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */ + field.offset = v4 ? 2 : 4; + field.sizem1 = 1; /* i.e 2 bytes */ + field.alg = NIX_LSOALG_ADD_PAYLEN; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); + + /* No ID field in IPv6 header */ + if (!v4) + return; + + /* IP's ID field */ + field.layer = NIX_TXLAYER_OL3; + field.offset = 4; + field.sizem1 = 1; /* i.e 2 bytes */ + field.alg = NIX_LSOALG_ADD_SEGNUM; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); +} + +static void nix_setup_lso_tso_l4(struct rvu *rvu, int blkaddr, + u64 format, u64 *fidx) +{ + struct nix_lso_format field = {0}; + + /* TCP's sequence number field */ + field.layer = NIX_TXLAYER_OL4; + field.offset = 4; + field.sizem1 = 3; /* i.e 4 bytes */ + field.alg = NIX_LSOALG_ADD_OFFSET; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); + + /* TCP's flags field */ + field.layer = NIX_TXLAYER_OL4; + field.offset = 12; + field.sizem1 = 0; /* not needed */ + field.alg = NIX_LSOALG_TCP_FLAGS; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); +} + +static void nix_setup_lso(struct rvu *rvu, int blkaddr) +{ + u64 cfg, idx, fidx = 0; + + /* Enable LSO */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LSO_CFG); + /* For TSO, set first and middle segment flags to + * mask out PSH, RST & FIN flags in TCP packet + */ + cfg &= ~((0xFFFFULL << 32) | (0xFFFFULL << 16)); + cfg |= (0xFFF2ULL << 32) | (0xFFF2ULL << 16); + rvu_write64(rvu, blkaddr, NIX_AF_LSO_CFG, cfg | BIT_ULL(63)); + + /* Configure format fields for TCPv4 segmentation offload */ + idx = NIX_LSO_FORMAT_IDX_TSOV4; + nix_setup_lso_tso_l3(rvu, blkaddr, idx, true, &fidx); + nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx); + + /* Set rest of the fields to NOP */ + for (; fidx < 8; fidx++) { + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL); + } + + /* Configure format fields for TCPv6 segmentation offload */ + idx = NIX_LSO_FORMAT_IDX_TSOV6; + fidx = 0; + nix_setup_lso_tso_l3(rvu, blkaddr, idx, false, &fidx); + nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx); + + /* Set rest of the fields to NOP */ + for (; fidx < 8; fidx++) { + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL); + } +} + +static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf) +{ + kfree(pfvf->rq_bmap); + kfree(pfvf->sq_bmap); + kfree(pfvf->cq_bmap); + if (pfvf->rq_ctx) + qmem_free(rvu->dev, pfvf->rq_ctx); + if (pfvf->sq_ctx) + qmem_free(rvu->dev, pfvf->sq_ctx); + if (pfvf->cq_ctx) + qmem_free(rvu->dev, pfvf->cq_ctx); + if (pfvf->rss_ctx) + qmem_free(rvu->dev, pfvf->rss_ctx); + if (pfvf->nix_qints_ctx) + qmem_free(rvu->dev, pfvf->nix_qints_ctx); + if (pfvf->cq_ints_ctx) + qmem_free(rvu->dev, pfvf->cq_ints_ctx); + + pfvf->rq_bmap = NULL; + pfvf->cq_bmap = NULL; + pfvf->sq_bmap = NULL; + pfvf->rq_ctx = NULL; + pfvf->sq_ctx = NULL; + pfvf->cq_ctx = NULL; + pfvf->rss_ctx = NULL; + pfvf->nix_qints_ctx = NULL; + pfvf->cq_ints_ctx = NULL; +} + +static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr, + struct rvu_pfvf *pfvf, int nixlf, + int rss_sz, int rss_grps, int hwctx_size) +{ + int err, grp, num_indices; + + /* RSS is not requested for this NIXLF */ + if (!rss_sz) + return 0; + num_indices = rss_sz * rss_grps; + + /* Alloc NIX RSS HW context memory and config the base */ + err = qmem_alloc(rvu->dev, &pfvf->rss_ctx, num_indices, hwctx_size); + if (err) + return err; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_BASE(nixlf), + (u64)pfvf->rss_ctx->iova); + + /* Config full RSS table size, enable RSS and caching */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf), + BIT_ULL(36) | BIT_ULL(4) | + ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE)); + /* Config RSS group offset and sizes */ + for (grp = 0; grp < rss_grps; grp++) + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp), + ((ilog2(rss_sz) - 1) << 16) | (rss_sz * grp)); + return 0; +} + +static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + struct nix_aq_inst_s *inst) +{ + struct admin_queue *aq = block->aq; + struct nix_aq_res_s *result; + int timeout = 1000; + u64 reg, head; + + result = (struct nix_aq_res_s *)aq->res->base; + + /* Get current head pointer where to append this instruction */ + reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS); + head = (reg >> 4) & AQ_PTR_MASK; + + memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)), + (void *)inst, aq->inst->entry_sz); + memset(result, 0, sizeof(*result)); + /* sync into memory */ + wmb(); + + /* Ring the doorbell and wait for result */ + rvu_write64(rvu, block->addr, NIX_AF_AQ_DOOR, 1); + while (result->compcode == NIX_AQ_COMP_NOTDONE) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) + return -EBUSY; + } + + if (result->compcode != NIX_AQ_COMP_GOOD) + /* TODO: Replace this with some error code */ + return -EBUSY; + + return 0; +} + +static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + int nixlf, blkaddr, rc = 0; + struct nix_aq_inst_s inst; + struct rvu_block *block; + struct admin_queue *aq; + struct rvu_pfvf *pfvf; + void *ctx, *mask; + bool ena; + u64 cfg; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (!pfvf->nixlf || blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + aq = block->aq; + if (!aq) { + dev_warn(rvu->dev, "%s: NIX AQ not initialized\n", __func__); + return NIX_AF_ERR_AQ_ENQUEUE; + } + + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + switch (req->ctype) { + case NIX_AQ_CTYPE_RQ: + /* Check if index exceeds max no of queues */ + if (!pfvf->rq_ctx || req->qidx >= pfvf->rq_ctx->qsize) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + case NIX_AQ_CTYPE_SQ: + if (!pfvf->sq_ctx || req->qidx >= pfvf->sq_ctx->qsize) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + case NIX_AQ_CTYPE_CQ: + if (!pfvf->cq_ctx || req->qidx >= pfvf->cq_ctx->qsize) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + case NIX_AQ_CTYPE_RSS: + /* Check if RSS is enabled and qidx is within range */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf)); + if (!(cfg & BIT_ULL(4)) || !pfvf->rss_ctx || + (req->qidx >= (256UL << (cfg & 0xF)))) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + default: + rc = NIX_AF_ERR_AQ_ENQUEUE; + } + + if (rc) + return rc; + + /* Check if SQ pointed SMQ belongs to this PF/VF or not */ + if (req->ctype == NIX_AQ_CTYPE_SQ && + req->op != NIX_AQ_INSTOP_WRITE) { + if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ, + pcifunc, req->sq.smq)) + return NIX_AF_ERR_AQ_ENQUEUE; + } + + memset(&inst, 0, sizeof(struct nix_aq_inst_s)); + inst.lf = nixlf; + inst.cindex = req->qidx; + inst.ctype = req->ctype; + inst.op = req->op; + /* Currently we are not supporting enqueuing multiple instructions, + * so always choose first entry in result memory. + */ + inst.res_addr = (u64)aq->res->iova; + + /* Clean result + context memory */ + memset(aq->res->base, 0, aq->res->entry_sz); + /* Context needs to be written at RES_ADDR + 128 */ + ctx = aq->res->base + 128; + /* Mask needs to be written at RES_ADDR + 256 */ + mask = aq->res->base + 256; + + switch (req->op) { + case NIX_AQ_INSTOP_WRITE: + if (req->ctype == NIX_AQ_CTYPE_RQ) + memcpy(mask, &req->rq_mask, + sizeof(struct nix_rq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_SQ) + memcpy(mask, &req->sq_mask, + sizeof(struct nix_sq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_CQ) + memcpy(mask, &req->cq_mask, + sizeof(struct nix_cq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_RSS) + memcpy(mask, &req->rss_mask, + sizeof(struct nix_rsse_s)); + /* Fall through */ + case NIX_AQ_INSTOP_INIT: + if (req->ctype == NIX_AQ_CTYPE_RQ) + memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_SQ) + memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_CQ) + memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_RSS) + memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s)); + break; + case NIX_AQ_INSTOP_NOP: + case NIX_AQ_INSTOP_READ: + case NIX_AQ_INSTOP_LOCK: + case NIX_AQ_INSTOP_UNLOCK: + break; + default: + rc = NIX_AF_ERR_AQ_ENQUEUE; + return rc; + } + + spin_lock(&aq->lock); + + /* Submit the instruction to AQ */ + rc = nix_aq_enqueue_wait(rvu, block, &inst); + if (rc) { + spin_unlock(&aq->lock); + return rc; + } + + /* Set RQ/SQ/CQ bitmap if respective queue hw context is enabled */ + if (req->op == NIX_AQ_INSTOP_INIT) { + if (req->ctype == NIX_AQ_CTYPE_RQ && req->rq.ena) + __set_bit(req->qidx, pfvf->rq_bmap); + if (req->ctype == NIX_AQ_CTYPE_SQ && req->sq.ena) + __set_bit(req->qidx, pfvf->sq_bmap); + if (req->ctype == NIX_AQ_CTYPE_CQ && req->cq.ena) + __set_bit(req->qidx, pfvf->cq_bmap); + } + + if (req->op == NIX_AQ_INSTOP_WRITE) { + if (req->ctype == NIX_AQ_CTYPE_RQ) { + ena = (req->rq.ena & req->rq_mask.ena) | + (test_bit(req->qidx, pfvf->rq_bmap) & + ~req->rq_mask.ena); + if (ena) + __set_bit(req->qidx, pfvf->rq_bmap); + else + __clear_bit(req->qidx, pfvf->rq_bmap); + } + if (req->ctype == NIX_AQ_CTYPE_SQ) { + ena = (req->rq.ena & req->sq_mask.ena) | + (test_bit(req->qidx, pfvf->sq_bmap) & + ~req->sq_mask.ena); + if (ena) + __set_bit(req->qidx, pfvf->sq_bmap); + else + __clear_bit(req->qidx, pfvf->sq_bmap); + } + if (req->ctype == NIX_AQ_CTYPE_CQ) { + ena = (req->rq.ena & req->cq_mask.ena) | + (test_bit(req->qidx, pfvf->cq_bmap) & + ~req->cq_mask.ena); + if (ena) + __set_bit(req->qidx, pfvf->cq_bmap); + else + __clear_bit(req->qidx, pfvf->cq_bmap); + } + } + + if (rsp) { + /* Copy read context into mailbox */ + if (req->op == NIX_AQ_INSTOP_READ) { + if (req->ctype == NIX_AQ_CTYPE_RQ) + memcpy(&rsp->rq, ctx, + sizeof(struct nix_rq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_SQ) + memcpy(&rsp->sq, ctx, + sizeof(struct nix_sq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_CQ) + memcpy(&rsp->cq, ctx, + sizeof(struct nix_cq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_RSS) + memcpy(&rsp->rss, ctx, + sizeof(struct nix_cq_ctx_s)); + } + } + + spin_unlock(&aq->lock); + return 0; +} + +static int nix_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct nix_aq_enq_req aq_req; + unsigned long *bmap; + int qidx, q_cnt = 0; + int err = 0, rc; + + if (!pfvf->cq_ctx || !pfvf->sq_ctx || !pfvf->rq_ctx) + return NIX_AF_ERR_AQ_ENQUEUE; + + memset(&aq_req, 0, sizeof(struct nix_aq_enq_req)); + aq_req.hdr.pcifunc = req->hdr.pcifunc; + + if (req->ctype == NIX_AQ_CTYPE_CQ) { + aq_req.cq.ena = 0; + aq_req.cq_mask.ena = 1; + q_cnt = pfvf->cq_ctx->qsize; + bmap = pfvf->cq_bmap; + } + if (req->ctype == NIX_AQ_CTYPE_SQ) { + aq_req.sq.ena = 0; + aq_req.sq_mask.ena = 1; + q_cnt = pfvf->sq_ctx->qsize; + bmap = pfvf->sq_bmap; + } + if (req->ctype == NIX_AQ_CTYPE_RQ) { + aq_req.rq.ena = 0; + aq_req.rq_mask.ena = 1; + q_cnt = pfvf->rq_ctx->qsize; + bmap = pfvf->rq_bmap; + } + + aq_req.ctype = req->ctype; + aq_req.op = NIX_AQ_INSTOP_WRITE; + + for (qidx = 0; qidx < q_cnt; qidx++) { + if (!test_bit(qidx, bmap)) + continue; + aq_req.qidx = qidx; + rc = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL); + if (rc) { + err = rc; + dev_err(rvu->dev, "Failed to disable %s:%d context\n", + (req->ctype == NIX_AQ_CTYPE_CQ) ? + "CQ" : ((req->ctype == NIX_AQ_CTYPE_RQ) ? + "RQ" : "SQ"), qidx); + } + } + + return err; +} + +int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu, + struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) +{ + return rvu_nix_aq_enq_inst(rvu, req, rsp); +} + +int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp) +{ + return nix_lf_hwctx_disable(rvu, req); +} + +int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu, + struct nix_lf_alloc_req *req, + struct nix_lf_alloc_rsp *rsp) +{ + int nixlf, qints, hwctx_size, err, rc = 0; + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + u64 cfg, ctx_cfg; + int blkaddr; + + if (!req->rq_cnt || !req->sq_cnt || !req->cq_cnt) + return NIX_AF_ERR_PARAM; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (!pfvf->nixlf || blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + /* If RSS is being enabled, check if requested config is valid. + * RSS table size should be power of two, otherwise + * RSS_GRP::OFFSET + adder might go beyond that group or + * won't be able to use entire table. + */ + if (req->rss_sz && (req->rss_sz > MAX_RSS_INDIR_TBL_SIZE || + !is_power_of_2(req->rss_sz))) + return NIX_AF_ERR_RSS_SIZE_INVALID; + + if (req->rss_sz && + (!req->rss_grps || req->rss_grps > MAX_RSS_GROUPS)) + return NIX_AF_ERR_RSS_GRPS_INVALID; + + /* Reset this NIX LF */ + err = rvu_lf_reset(rvu, block, nixlf); + if (err) { + dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n", + block->addr - BLKADDR_NIX0, nixlf); + return NIX_AF_ERR_LF_RESET; + } + + ctx_cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST3); + + /* Alloc NIX RQ HW context memory and config the base */ + hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->rq_ctx, req->rq_cnt, hwctx_size); + if (err) + goto free_mem; + + pfvf->rq_bmap = kcalloc(req->rq_cnt, sizeof(long), GFP_KERNEL); + if (!pfvf->rq_bmap) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_BASE(nixlf), + (u64)pfvf->rq_ctx->iova); + + /* Set caching and queue count in HW */ + cfg = BIT_ULL(36) | (req->rq_cnt - 1); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_CFG(nixlf), cfg); + + /* Alloc NIX SQ HW context memory and config the base */ + hwctx_size = 1UL << (ctx_cfg & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->sq_ctx, req->sq_cnt, hwctx_size); + if (err) + goto free_mem; + + pfvf->sq_bmap = kcalloc(req->sq_cnt, sizeof(long), GFP_KERNEL); + if (!pfvf->sq_bmap) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_BASE(nixlf), + (u64)pfvf->sq_ctx->iova); + cfg = BIT_ULL(36) | (req->sq_cnt - 1); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_CFG(nixlf), cfg); + + /* Alloc NIX CQ HW context memory and config the base */ + hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->cq_ctx, req->cq_cnt, hwctx_size); + if (err) + goto free_mem; + + pfvf->cq_bmap = kcalloc(req->cq_cnt, sizeof(long), GFP_KERNEL); + if (!pfvf->cq_bmap) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_BASE(nixlf), + (u64)pfvf->cq_ctx->iova); + cfg = BIT_ULL(36) | (req->cq_cnt - 1); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_CFG(nixlf), cfg); + + /* Initialize receive side scaling (RSS) */ + hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF); + err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf, + req->rss_sz, req->rss_grps, hwctx_size); + if (err) + goto free_mem; + + /* Alloc memory for CQINT's HW contexts */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); + qints = (cfg >> 24) & 0xFFF; + hwctx_size = 1UL << ((ctx_cfg >> 24) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->cq_ints_ctx, qints, hwctx_size); + if (err) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_BASE(nixlf), + (u64)pfvf->cq_ints_ctx->iova); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_CFG(nixlf), BIT_ULL(36)); + + /* Alloc memory for QINT's HW contexts */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); + qints = (cfg >> 12) & 0xFFF; + hwctx_size = 1UL << ((ctx_cfg >> 20) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->nix_qints_ctx, qints, hwctx_size); + if (err) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_BASE(nixlf), + (u64)pfvf->nix_qints_ctx->iova); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_CFG(nixlf), BIT_ULL(36)); + + /* Enable LMTST for this NIX LF */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG2(nixlf), BIT_ULL(0)); + + /* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC + * If requester has sent a 'RVU_DEFAULT_PF_FUNC' use this NIX LF's + * PCIFUNC itself. + */ + if (req->npa_func == RVU_DEFAULT_PF_FUNC) + cfg = pcifunc; + else + cfg = req->npa_func; + + if (req->sso_func == RVU_DEFAULT_PF_FUNC) + cfg |= (u64)pcifunc << 16; + else + cfg |= (u64)req->sso_func << 16; + + cfg |= (u64)req->xqe_sz << 33; + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf), cfg); + + /* Config Rx pkt length, csum checks and apad enable / disable */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg); + + goto exit; + +free_mem: + nix_ctx_free(rvu, pfvf); + rc = -ENOMEM; + +exit: + /* Set macaddr of this PF/VF */ + ether_addr_copy(rsp->mac_addr, pfvf->mac_addr); + + /* set SQB size info */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQ_CONST); + rsp->sqb_size = (cfg >> 34) & 0xFFFF; + rsp->lso_tsov4_idx = NIX_LSO_FORMAT_IDX_TSOV4; + rsp->lso_tsov6_idx = NIX_LSO_FORMAT_IDX_TSOV6; + return rc; +} + +int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int blkaddr, nixlf, err; + struct rvu_pfvf *pfvf; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (!pfvf->nixlf || blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + /* Reset this NIX LF */ + err = rvu_lf_reset(rvu, block, nixlf); + if (err) { + dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n", + block->addr - BLKADDR_NIX0, nixlf); + return NIX_AF_ERR_LF_RESET; + } + + nix_ctx_free(rvu, pfvf); + + return 0; +} + +static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) +{ + struct nix_txsch *txsch; + u64 cfg, reg; + int err, lvl; + + /* Get scheduler queue count of each type and alloc + * bitmap for each for alloc/free/attach operations. + */ + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + txsch = &nix_hw->txsch[lvl]; + txsch->lvl = lvl; + switch (lvl) { + case NIX_TXSCH_LVL_SMQ: + reg = NIX_AF_MDQ_CONST; + break; + case NIX_TXSCH_LVL_TL4: + reg = NIX_AF_TL4_CONST; + break; + case NIX_TXSCH_LVL_TL3: + reg = NIX_AF_TL3_CONST; + break; + case NIX_TXSCH_LVL_TL2: + reg = NIX_AF_TL2_CONST; + break; + case NIX_TXSCH_LVL_TL1: + reg = NIX_AF_TL1_CONST; + break; + } + cfg = rvu_read64(rvu, blkaddr, reg); + txsch->schq.max = cfg & 0xFFFF; + err = rvu_alloc_bitmap(&txsch->schq); + if (err) + return err; + + /* Allocate memory for scheduler queues to + * PF/VF pcifunc mapping info. + */ + txsch->pfvf_map = devm_kcalloc(rvu->dev, txsch->schq.max, + sizeof(u16), GFP_KERNEL); + if (!txsch->pfvf_map) + return -ENOMEM; + } + return 0; +} + +static int nix_calibrate_x2p(struct rvu *rvu, int blkaddr) +{ + int idx, err; + u64 status; + + /* Start X2P bus calibration */ + rvu_write64(rvu, blkaddr, NIX_AF_CFG, + rvu_read64(rvu, blkaddr, NIX_AF_CFG) | BIT_ULL(9)); + /* Wait for calibration to complete */ + err = rvu_poll_reg(rvu, blkaddr, + NIX_AF_STATUS, BIT_ULL(10), false); + if (err) { + dev_err(rvu->dev, "NIX X2P bus calibration failed\n"); + return err; + } + + status = rvu_read64(rvu, blkaddr, NIX_AF_STATUS); + /* Check if CGX devices are ready */ + for (idx = 0; idx < cgx_get_cgx_cnt(); idx++) { + if (status & (BIT_ULL(16 + idx))) + continue; + dev_err(rvu->dev, + "CGX%d didn't respond to NIX X2P calibration\n", idx); + err = -EBUSY; + } + + /* Check if LBK is ready */ + if (!(status & BIT_ULL(19))) { + dev_err(rvu->dev, + "LBK didn't respond to NIX X2P calibration\n"); + err = -EBUSY; + } + + /* Clear 'calibrate_x2p' bit */ + rvu_write64(rvu, blkaddr, NIX_AF_CFG, + rvu_read64(rvu, blkaddr, NIX_AF_CFG) & ~BIT_ULL(9)); + if (err || (status & 0x3FFULL)) + dev_err(rvu->dev, + "NIX X2P calibration failed, status 0x%llx\n", status); + if (err) + return err; + return 0; +} + +static int nix_aq_init(struct rvu *rvu, struct rvu_block *block) +{ + u64 cfg; + int err; + + /* Set admin queue endianness */ + cfg = rvu_read64(rvu, block->addr, NIX_AF_CFG); +#ifdef __BIG_ENDIAN + cfg |= BIT_ULL(1); + rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg); +#else + cfg &= ~BIT_ULL(1); + rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg); +#endif + + /* Do not bypass NDC cache */ + cfg = rvu_read64(rvu, block->addr, NIX_AF_NDC_CFG); + cfg &= ~0x3FFEULL; + rvu_write64(rvu, block->addr, NIX_AF_NDC_CFG, cfg); + + /* Result structure can be followed by RQ/SQ/CQ context at + * RES + 128bytes and a write mask at RES + 256 bytes, depending on + * operation type. Alloc sufficient result memory for all operations. + */ + err = rvu_aq_alloc(rvu, &block->aq, + Q_COUNT(AQ_SIZE), sizeof(struct nix_aq_inst_s), + ALIGN(sizeof(struct nix_aq_res_s), 128) + 256); + if (err) + return err; + + rvu_write64(rvu, block->addr, NIX_AF_AQ_CFG, AQ_SIZE); + rvu_write64(rvu, block->addr, + NIX_AF_AQ_BASE, (u64)block->aq->inst->iova); + return 0; +} + +int rvu_nix_init(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr, err; + u64 cfg; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return 0; + block = &hw->block[blkaddr]; + + /* Calibrate X2P bus to check if CGX/LBK links are fine */ + err = nix_calibrate_x2p(rvu, blkaddr); + if (err) + return err; + + /* Set num of links of each type */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST); + hw->cgx = (cfg >> 12) & 0xF; + hw->lmac_per_cgx = (cfg >> 8) & 0xF; + hw->cgx_links = hw->cgx * hw->lmac_per_cgx; + hw->lbk_links = 1; + hw->sdp_links = 1; + + /* Initialize admin queue */ + err = nix_aq_init(rvu, block); + if (err) + return err; + + /* Restore CINT timer delay to HW reset values */ + rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL); + + /* Configure segmentation offload formats */ + nix_setup_lso(rvu, blkaddr); + + if (blkaddr == BLKADDR_NIX0) { + hw->nix0 = devm_kzalloc(rvu->dev, + sizeof(struct nix_hw), GFP_KERNEL); + if (!hw->nix0) + return -ENOMEM; + + err = nix_setup_txschq(rvu, hw->nix0, blkaddr); + if (err) + return err; + } + return 0; +} + +void rvu_nix_freemem(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + int blkaddr, lvl; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return; + + block = &hw->block[blkaddr]; + rvu_aq_free(rvu, block->aq); + + if (blkaddr == BLKADDR_NIX0) { + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + txsch = &nix_hw->txsch[lvl]; + kfree(txsch->schq.bmap); + } + } +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c new file mode 100644 index 000000000000..0e43a693d119 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2018 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "rvu_struct.h" +#include "rvu_reg.h" +#include "rvu.h" + +static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + struct npa_aq_inst_s *inst) +{ + struct admin_queue *aq = block->aq; + struct npa_aq_res_s *result; + int timeout = 1000; + u64 reg, head; + + result = (struct npa_aq_res_s *)aq->res->base; + + /* Get current head pointer where to append this instruction */ + reg = rvu_read64(rvu, block->addr, NPA_AF_AQ_STATUS); + head = (reg >> 4) & AQ_PTR_MASK; + + memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)), + (void *)inst, aq->inst->entry_sz); + memset(result, 0, sizeof(*result)); + /* sync into memory */ + wmb(); + + /* Ring the doorbell and wait for result */ + rvu_write64(rvu, block->addr, NPA_AF_AQ_DOOR, 1); + while (result->compcode == NPA_AQ_COMP_NOTDONE) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) + return -EBUSY; + } + + if (result->compcode != NPA_AQ_COMP_GOOD) + /* TODO: Replace this with some error code */ + return -EBUSY; + + return 0; +} + +static int rvu_npa_aq_enq_inst(struct rvu *rvu, struct npa_aq_enq_req *req, + struct npa_aq_enq_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, npalf, rc = 0; + struct npa_aq_inst_s inst; + struct rvu_block *block; + struct admin_queue *aq; + struct rvu_pfvf *pfvf; + void *ctx, *mask; + bool ena; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + if (!pfvf->aura_ctx || req->aura_id >= pfvf->aura_ctx->qsize) + return NPA_AF_ERR_AQ_ENQUEUE; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (!pfvf->npalf || blkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + aq = block->aq; + if (!aq) { + dev_warn(rvu->dev, "%s: NPA AQ not initialized\n", __func__); + return NPA_AF_ERR_AQ_ENQUEUE; + } + + npalf = rvu_get_lf(rvu, block, pcifunc, 0); + if (npalf < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + memset(&inst, 0, sizeof(struct npa_aq_inst_s)); + inst.cindex = req->aura_id; + inst.lf = npalf; + inst.ctype = req->ctype; + inst.op = req->op; + /* Currently we are not supporting enqueuing multiple instructions, + * so always choose first entry in result memory. + */ + inst.res_addr = (u64)aq->res->iova; + + /* Clean result + context memory */ + memset(aq->res->base, 0, aq->res->entry_sz); + /* Context needs to be written at RES_ADDR + 128 */ + ctx = aq->res->base + 128; + /* Mask needs to be written at RES_ADDR + 256 */ + mask = aq->res->base + 256; + + switch (req->op) { + case NPA_AQ_INSTOP_WRITE: + /* Copy context and write mask */ + if (req->ctype == NPA_AQ_CTYPE_AURA) { + memcpy(mask, &req->aura_mask, + sizeof(struct npa_aura_s)); + memcpy(ctx, &req->aura, sizeof(struct npa_aura_s)); + } else { + memcpy(mask, &req->pool_mask, + sizeof(struct npa_pool_s)); + memcpy(ctx, &req->pool, sizeof(struct npa_pool_s)); + } + break; + case NPA_AQ_INSTOP_INIT: + if (req->ctype == NPA_AQ_CTYPE_AURA) { + if (req->aura.pool_addr >= pfvf->pool_ctx->qsize) { + rc = NPA_AF_ERR_AQ_FULL; + break; + } + /* Set pool's context address */ + req->aura.pool_addr = pfvf->pool_ctx->iova + + (req->aura.pool_addr * pfvf->pool_ctx->entry_sz); + memcpy(ctx, &req->aura, sizeof(struct npa_aura_s)); + } else { /* POOL's context */ + memcpy(ctx, &req->pool, sizeof(struct npa_pool_s)); + } + break; + case NPA_AQ_INSTOP_NOP: + case NPA_AQ_INSTOP_READ: + case NPA_AQ_INSTOP_LOCK: + case NPA_AQ_INSTOP_UNLOCK: + break; + default: + rc = NPA_AF_ERR_AQ_FULL; + break; + } + + if (rc) + return rc; + + spin_lock(&aq->lock); + + /* Submit the instruction to AQ */ + rc = npa_aq_enqueue_wait(rvu, block, &inst); + if (rc) { + spin_unlock(&aq->lock); + return rc; + } + + /* Set aura bitmap if aura hw context is enabled */ + if (req->ctype == NPA_AQ_CTYPE_AURA) { + if (req->op == NPA_AQ_INSTOP_INIT && req->aura.ena) + __set_bit(req->aura_id, pfvf->aura_bmap); + if (req->op == NPA_AQ_INSTOP_WRITE) { + ena = (req->aura.ena & req->aura_mask.ena) | + (test_bit(req->aura_id, pfvf->aura_bmap) & + ~req->aura_mask.ena); + if (ena) + __set_bit(req->aura_id, pfvf->aura_bmap); + else + __clear_bit(req->aura_id, pfvf->aura_bmap); + } + } + + /* Set pool bitmap if pool hw context is enabled */ + if (req->ctype == NPA_AQ_CTYPE_POOL) { + if (req->op == NPA_AQ_INSTOP_INIT && req->pool.ena) + __set_bit(req->aura_id, pfvf->pool_bmap); + if (req->op == NPA_AQ_INSTOP_WRITE) { + ena = (req->pool.ena & req->pool_mask.ena) | + (test_bit(req->aura_id, pfvf->pool_bmap) & + ~req->pool_mask.ena); + if (ena) + __set_bit(req->aura_id, pfvf->pool_bmap); + else + __clear_bit(req->aura_id, pfvf->pool_bmap); + } + } + spin_unlock(&aq->lock); + + if (rsp) { + /* Copy read context into mailbox */ + if (req->op == NPA_AQ_INSTOP_READ) { + if (req->ctype == NPA_AQ_CTYPE_AURA) + memcpy(&rsp->aura, ctx, + sizeof(struct npa_aura_s)); + else + memcpy(&rsp->pool, ctx, + sizeof(struct npa_pool_s)); + } + } + + return 0; +} + +static int npa_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npa_aq_enq_req aq_req; + unsigned long *bmap; + int id, cnt = 0; + int err = 0, rc; + + if (!pfvf->pool_ctx || !pfvf->aura_ctx) + return NPA_AF_ERR_AQ_ENQUEUE; + + memset(&aq_req, 0, sizeof(struct npa_aq_enq_req)); + aq_req.hdr.pcifunc = req->hdr.pcifunc; + + if (req->ctype == NPA_AQ_CTYPE_POOL) { + aq_req.pool.ena = 0; + aq_req.pool_mask.ena = 1; + cnt = pfvf->pool_ctx->qsize; + bmap = pfvf->pool_bmap; + } else if (req->ctype == NPA_AQ_CTYPE_AURA) { + aq_req.aura.ena = 0; + aq_req.aura_mask.ena = 1; + cnt = pfvf->aura_ctx->qsize; + bmap = pfvf->aura_bmap; + } + + aq_req.ctype = req->ctype; + aq_req.op = NPA_AQ_INSTOP_WRITE; + + for (id = 0; id < cnt; id++) { + if (!test_bit(id, bmap)) + continue; + aq_req.aura_id = id; + rc = rvu_npa_aq_enq_inst(rvu, &aq_req, NULL); + if (rc) { + err = rc; + dev_err(rvu->dev, "Failed to disable %s:%d context\n", + (req->ctype == NPA_AQ_CTYPE_AURA) ? + "Aura" : "Pool", id); + } + } + + return err; +} + +int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu, + struct npa_aq_enq_req *req, + struct npa_aq_enq_rsp *rsp) +{ + return rvu_npa_aq_enq_inst(rvu, req, rsp); +} + +int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp) +{ + return npa_lf_hwctx_disable(rvu, req); +} + +static void npa_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf) +{ + kfree(pfvf->aura_bmap); + pfvf->aura_bmap = NULL; + + qmem_free(rvu->dev, pfvf->aura_ctx); + pfvf->aura_ctx = NULL; + + kfree(pfvf->pool_bmap); + pfvf->pool_bmap = NULL; + + qmem_free(rvu->dev, pfvf->pool_ctx); + pfvf->pool_ctx = NULL; + + qmem_free(rvu->dev, pfvf->npa_qints_ctx); + pfvf->npa_qints_ctx = NULL; +} + +int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu, + struct npa_lf_alloc_req *req, + struct npa_lf_alloc_rsp *rsp) +{ + int npalf, qints, hwctx_size, err, rc = 0; + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + u64 cfg, ctx_cfg; + int blkaddr; + + if (req->aura_sz > NPA_AURA_SZ_MAX || + req->aura_sz == NPA_AURA_SZ_0 || !req->nr_pools) + return NPA_AF_ERR_PARAM; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (!pfvf->npalf || blkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + npalf = rvu_get_lf(rvu, block, pcifunc, 0); + if (npalf < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + /* Reset this NPA LF */ + err = rvu_lf_reset(rvu, block, npalf); + if (err) { + dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf); + return NPA_AF_ERR_LF_RESET; + } + + ctx_cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST1); + + /* Alloc memory for aura HW contexts */ + hwctx_size = 1UL << (ctx_cfg & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->aura_ctx, + NPA_AURA_COUNT(req->aura_sz), hwctx_size); + if (err) + goto free_mem; + + pfvf->aura_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long), + GFP_KERNEL); + if (!pfvf->aura_bmap) + goto free_mem; + + /* Alloc memory for pool HW contexts */ + hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->pool_ctx, req->nr_pools, hwctx_size); + if (err) + goto free_mem; + + pfvf->pool_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long), + GFP_KERNEL); + if (!pfvf->pool_bmap) + goto free_mem; + + /* Get no of queue interrupts supported */ + cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST); + qints = (cfg >> 28) & 0xFFF; + + /* Alloc memory for Qints HW contexts */ + hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->npa_qints_ctx, qints, hwctx_size); + if (err) + goto free_mem; + + cfg = rvu_read64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf)); + /* Clear way partition mask and set aura offset to '0' */ + cfg &= ~(BIT_ULL(34) - 1); + /* Set aura size & enable caching of contexts */ + cfg |= (req->aura_sz << 16) | BIT_ULL(34); + rvu_write64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf), cfg); + + /* Configure aura HW context's base */ + rvu_write64(rvu, blkaddr, NPA_AF_LFX_LOC_AURAS_BASE(npalf), + (u64)pfvf->aura_ctx->iova); + + /* Enable caching of qints hw context */ + rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_CFG(npalf), BIT_ULL(36)); + rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_BASE(npalf), + (u64)pfvf->npa_qints_ctx->iova); + + goto exit; + +free_mem: + npa_ctx_free(rvu, pfvf); + rc = -ENOMEM; + +exit: + /* set stack page info */ + cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST); + rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF; + rsp->stack_pg_bytes = cfg & 0xFF; + rsp->qints = (cfg >> 28) & 0xFFF; + return rc; +} + +int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + int npalf, err; + int blkaddr; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (!pfvf->npalf || blkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + npalf = rvu_get_lf(rvu, block, pcifunc, 0); + if (npalf < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + /* Reset this NPA LF */ + err = rvu_lf_reset(rvu, block, npalf); + if (err) { + dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf); + return NPA_AF_ERR_LF_RESET; + } + + npa_ctx_free(rvu, pfvf); + + return 0; +} + +static int npa_aq_init(struct rvu *rvu, struct rvu_block *block) +{ + u64 cfg; + int err; + + /* Set admin queue endianness */ + cfg = rvu_read64(rvu, block->addr, NPA_AF_GEN_CFG); +#ifdef __BIG_ENDIAN + cfg |= BIT_ULL(1); + rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg); +#else + cfg &= ~BIT_ULL(1); + rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg); +#endif + + /* Do not bypass NDC cache */ + cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG); + cfg &= ~0x03DULL; + rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg); + + /* Result structure can be followed by Aura/Pool context at + * RES + 128bytes and a write mask at RES + 256 bytes, depending on + * operation type. Alloc sufficient result memory for all operations. + */ + err = rvu_aq_alloc(rvu, &block->aq, + Q_COUNT(AQ_SIZE), sizeof(struct npa_aq_inst_s), + ALIGN(sizeof(struct npa_aq_res_s), 128) + 256); + if (err) + return err; + + rvu_write64(rvu, block->addr, NPA_AF_AQ_CFG, AQ_SIZE); + rvu_write64(rvu, block->addr, + NPA_AF_AQ_BASE, (u64)block->aq->inst->iova); + return 0; +} + +int rvu_npa_init(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr, err; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0); + if (blkaddr < 0) + return 0; + + block = &hw->block[blkaddr]; + + /* Initialize admin queue */ + err = npa_aq_init(rvu, &hw->block[blkaddr]); + if (err) + return err; + + return 0; +} + +void rvu_npa_freemem(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0); + if (blkaddr < 0) + return; + + block = &hw->block[blkaddr]; + rvu_aq_free(rvu, block->aq); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 92e05817ffe7..c331b237a26f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -71,4 +71,812 @@ enum rvu_pf_int_vec_e { RVU_PF_INT_VEC_CNT = 0x7, }; +/* NPA admin queue completion enumeration */ +enum npa_aq_comp { + NPA_AQ_COMP_NOTDONE = 0x0, + NPA_AQ_COMP_GOOD = 0x1, + NPA_AQ_COMP_SWERR = 0x2, + NPA_AQ_COMP_CTX_POISON = 0x3, + NPA_AQ_COMP_CTX_FAULT = 0x4, + NPA_AQ_COMP_LOCKERR = 0x5, +}; + +/* NPA admin queue context types */ +enum npa_aq_ctype { + NPA_AQ_CTYPE_AURA = 0x0, + NPA_AQ_CTYPE_POOL = 0x1, +}; + +/* NPA admin queue instruction opcodes */ +enum npa_aq_instop { + NPA_AQ_INSTOP_NOP = 0x0, + NPA_AQ_INSTOP_INIT = 0x1, + NPA_AQ_INSTOP_WRITE = 0x2, + NPA_AQ_INSTOP_READ = 0x3, + NPA_AQ_INSTOP_LOCK = 0x4, + NPA_AQ_INSTOP_UNLOCK = 0x5, +}; + +/* NPA admin queue instruction structure */ +struct npa_aq_inst_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 doneint : 1; /* W0 */ + u64 reserved_44_62 : 19; + u64 cindex : 20; + u64 reserved_17_23 : 7; + u64 lf : 9; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 lf : 9; + u64 reserved_17_23 : 7; + u64 cindex : 20; + u64 reserved_44_62 : 19; + u64 doneint : 1; +#endif + u64 res_addr; /* W1 */ +}; + +/* NPA admin queue result structure */ +struct npa_aq_res_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_17_63 : 47; /* W0 */ + u64 doneint : 1; + u64 compcode : 8; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 compcode : 8; + u64 doneint : 1; + u64 reserved_17_63 : 47; +#endif + u64 reserved_64_127; /* W1 */ +}; + +struct npa_aura_s { + u64 pool_addr; /* W0 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 avg_level : 8; + u64 reserved_118_119 : 2; + u64 shift : 6; + u64 aura_drop : 8; + u64 reserved_98_103 : 6; + u64 bp_ena : 2; + u64 aura_drop_ena : 1; + u64 pool_drop_ena : 1; + u64 reserved_93 : 1; + u64 avg_con : 9; + u64 pool_way_mask : 16; + u64 pool_caching : 1; + u64 reserved_65 : 2; + u64 ena : 1; +#else + u64 ena : 1; + u64 reserved_65 : 2; + u64 pool_caching : 1; + u64 pool_way_mask : 16; + u64 avg_con : 9; + u64 reserved_93 : 1; + u64 pool_drop_ena : 1; + u64 aura_drop_ena : 1; + u64 bp_ena : 2; + u64 reserved_98_103 : 6; + u64 aura_drop : 8; + u64 shift : 6; + u64 reserved_118_119 : 2; + u64 avg_level : 8; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 reserved_189_191 : 3; + u64 nix1_bpid : 9; + u64 reserved_177_179 : 3; + u64 nix0_bpid : 9; + u64 reserved_164_167 : 4; + u64 count : 36; +#else + u64 count : 36; + u64 reserved_164_167 : 4; + u64 nix0_bpid : 9; + u64 reserved_177_179 : 3; + u64 nix1_bpid : 9; + u64 reserved_189_191 : 3; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 reserved_252_255 : 4; + u64 fc_hyst_bits : 4; + u64 fc_stype : 2; + u64 fc_up_crossing : 1; + u64 fc_ena : 1; + u64 reserved_240_243 : 4; + u64 bp : 8; + u64 reserved_228_231 : 4; + u64 limit : 36; +#else + u64 limit : 36; + u64 reserved_228_231 : 4; + u64 bp : 8; + u64 reserved_240_243 : 4; + u64 fc_ena : 1; + u64 fc_up_crossing : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 reserved_252_255 : 4; +#endif + u64 fc_addr; /* W4 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */ + u64 reserved_379_383 : 5; + u64 err_qint_idx : 7; + u64 reserved_371 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_363 : 1; + u64 thresh_up : 1; + u64 thresh_int_ena : 1; + u64 thresh_int : 1; + u64 err_int_ena : 8; + u64 err_int : 8; + u64 update_time : 16; + u64 pool_drop : 8; +#else + u64 pool_drop : 8; + u64 update_time : 16; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_363 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_371 : 1; + u64 err_qint_idx : 7; + u64 reserved_379_383 : 5; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */ + u64 reserved_420_447 : 28; + u64 thresh : 36; +#else + u64 thresh : 36; + u64 reserved_420_447 : 28; +#endif + u64 reserved_448_511; /* W7 */ +}; + +struct npa_pool_s { + u64 stack_base; /* W0 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 reserved_115_127 : 13; + u64 buf_size : 11; + u64 reserved_100_103 : 4; + u64 buf_offset : 12; + u64 stack_way_mask : 16; + u64 reserved_70_71 : 3; + u64 stack_caching : 1; + u64 reserved_66_67 : 2; + u64 nat_align : 1; + u64 ena : 1; +#else + u64 ena : 1; + u64 nat_align : 1; + u64 reserved_66_67 : 2; + u64 stack_caching : 1; + u64 reserved_70_71 : 3; + u64 stack_way_mask : 16; + u64 buf_offset : 12; + u64 reserved_100_103 : 4; + u64 buf_size : 11; + u64 reserved_115_127 : 13; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 stack_pages : 32; + u64 stack_max_pages : 32; +#else + u64 stack_max_pages : 32; + u64 stack_pages : 32; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 reserved_240_255 : 16; + u64 op_pc : 48; +#else + u64 op_pc : 48; + u64 reserved_240_255 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */ + u64 reserved_316_319 : 4; + u64 update_time : 16; + u64 reserved_297_299 : 3; + u64 fc_up_crossing : 1; + u64 fc_hyst_bits : 4; + u64 fc_stype : 2; + u64 fc_ena : 1; + u64 avg_con : 9; + u64 avg_level : 8; + u64 reserved_270_271 : 2; + u64 shift : 6; + u64 reserved_260_263 : 4; + u64 stack_offset : 4; +#else + u64 stack_offset : 4; + u64 reserved_260_263 : 4; + u64 shift : 6; + u64 reserved_270_271 : 2; + u64 avg_level : 8; + u64 avg_con : 9; + u64 fc_ena : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 fc_up_crossing : 1; + u64 reserved_297_299 : 3; + u64 update_time : 16; + u64 reserved_316_319 : 4; +#endif + u64 fc_addr; /* W5 */ + u64 ptr_start; /* W6 */ + u64 ptr_end; /* W7 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */ + u64 reserved_571_575 : 5; + u64 err_qint_idx : 7; + u64 reserved_563 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_555 : 1; + u64 thresh_up : 1; + u64 thresh_int_ena : 1; + u64 thresh_int : 1; + u64 err_int_ena : 8; + u64 err_int : 8; + u64 reserved_512_535 : 24; +#else + u64 reserved_512_535 : 24; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_555 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_563 : 1; + u64 err_qint_idx : 7; + u64 reserved_571_575 : 5; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ + u64 reserved_612_639 : 28; + u64 thresh : 36; +#else + u64 thresh : 36; + u64 reserved_612_639 : 28; +#endif + u64 reserved_640_703; /* W10 */ + u64 reserved_704_767; /* W11 */ + u64 reserved_768_831; /* W12 */ + u64 reserved_832_895; /* W13 */ + u64 reserved_896_959; /* W14 */ + u64 reserved_960_1023; /* W15 */ +}; + +/* NIX admin queue completion status */ +enum nix_aq_comp { + NIX_AQ_COMP_NOTDONE = 0x0, + NIX_AQ_COMP_GOOD = 0x1, + NIX_AQ_COMP_SWERR = 0x2, + NIX_AQ_COMP_CTX_POISON = 0x3, + NIX_AQ_COMP_CTX_FAULT = 0x4, + NIX_AQ_COMP_LOCKERR = 0x5, + NIX_AQ_COMP_SQB_ALLOC_FAIL = 0x6, +}; + +/* NIX admin queue context types */ +enum nix_aq_ctype { + NIX_AQ_CTYPE_RQ = 0x0, + NIX_AQ_CTYPE_SQ = 0x1, + NIX_AQ_CTYPE_CQ = 0x2, + NIX_AQ_CTYPE_MCE = 0x3, + NIX_AQ_CTYPE_RSS = 0x4, + NIX_AQ_CTYPE_DYNO = 0x5, +}; + +/* NIX admin queue instruction opcodes */ +enum nix_aq_instop { + NIX_AQ_INSTOP_NOP = 0x0, + NIX_AQ_INSTOP_INIT = 0x1, + NIX_AQ_INSTOP_WRITE = 0x2, + NIX_AQ_INSTOP_READ = 0x3, + NIX_AQ_INSTOP_LOCK = 0x4, + NIX_AQ_INSTOP_UNLOCK = 0x5, +}; + +/* NIX admin queue instruction structure */ +struct nix_aq_inst_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 doneint : 1; /* W0 */ + u64 reserved_44_62 : 19; + u64 cindex : 20; + u64 reserved_15_23 : 9; + u64 lf : 7; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 lf : 7; + u64 reserved_15_23 : 9; + u64 cindex : 20; + u64 reserved_44_62 : 19; + u64 doneint : 1; +#endif + u64 res_addr; /* W1 */ +}; + +/* NIX admin queue result structure */ +struct nix_aq_res_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_17_63 : 47; /* W0 */ + u64 doneint : 1; + u64 compcode : 8; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 compcode : 8; + u64 doneint : 1; + u64 reserved_17_63 : 47; +#endif + u64 reserved_64_127; /* W1 */ +}; + +/* NIX Completion queue context structure */ +struct nix_cq_ctx_s { + u64 base; +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 wrptr : 20; + u64 avg_con : 9; + u64 cint_idx : 7; + u64 cq_err : 1; + u64 qint_idx : 7; + u64 rsvd_81_83 : 3; + u64 bpid : 9; + u64 rsvd_69_71 : 3; + u64 bp_ena : 1; + u64 rsvd_64_67 : 4; +#else + u64 rsvd_64_67 : 4; + u64 bp_ena : 1; + u64 rsvd_69_71 : 3; + u64 bpid : 9; + u64 rsvd_81_83 : 3; + u64 qint_idx : 7; + u64 cq_err : 1; + u64 cint_idx : 7; + u64 avg_con : 9; + u64 wrptr : 20; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 update_time : 16; + u64 avg_level : 8; + u64 head : 20; + u64 tail : 20; +#else + u64 tail : 20; + u64 head : 20; + u64 avg_level : 8; + u64 update_time : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 cq_err_int_ena : 8; + u64 cq_err_int : 8; + u64 qsize : 4; + u64 rsvd_233_235 : 3; + u64 caching : 1; + u64 substream : 20; + u64 rsvd_210_211 : 2; + u64 ena : 1; + u64 drop_ena : 1; + u64 drop : 8; + u64 dp : 8; +#else + u64 dp : 8; + u64 drop : 8; + u64 drop_ena : 1; + u64 ena : 1; + u64 rsvd_210_211 : 2; + u64 substream : 20; + u64 caching : 1; + u64 rsvd_233_235 : 3; + u64 qsize : 4; + u64 cq_err_int : 8; + u64 cq_err_int_ena : 8; +#endif +}; + +/* NIX Receive queue context structure */ +struct nix_rq_ctx_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ + u64 wqe_aura : 20; + u64 substream : 20; + u64 cq : 20; + u64 ena_wqwd : 1; + u64 ipsech_ena : 1; + u64 sso_ena : 1; + u64 ena : 1; +#else + u64 ena : 1; + u64 sso_ena : 1; + u64 ipsech_ena : 1; + u64 ena_wqwd : 1; + u64 cq : 20; + u64 substream : 20; + u64 wqe_aura : 20; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 rsvd_127_122 : 6; + u64 lpb_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 xqe_drop_ena : 1; + u64 wqe_caching : 1; + u64 pb_caching : 2; + u64 sso_tt : 2; + u64 sso_grp : 10; + u64 lpb_aura : 20; + u64 spb_aura : 20; +#else + u64 spb_aura : 20; + u64 lpb_aura : 20; + u64 sso_grp : 10; + u64 sso_tt : 2; + u64 pb_caching : 2; + u64 wqe_caching : 1; + u64 xqe_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 lpb_drop_ena : 1; + u64 rsvd_127_122 : 6; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 xqe_hdr_split : 1; + u64 xqe_imm_copy : 1; + u64 rsvd_189_184 : 6; + u64 xqe_imm_size : 6; + u64 later_skip : 6; + u64 rsvd_171 : 1; + u64 first_skip : 7; + u64 lpb_sizem1 : 12; + u64 spb_ena : 1; + u64 rsvd_150_148 : 3; + u64 wqe_skip : 2; + u64 spb_sizem1 : 6; + u64 rsvd_139_128 : 12; +#else + u64 rsvd_139_128 : 12; + u64 spb_sizem1 : 6; + u64 wqe_skip : 2; + u64 rsvd_150_148 : 3; + u64 spb_ena : 1; + u64 lpb_sizem1 : 12; + u64 first_skip : 7; + u64 rsvd_171 : 1; + u64 later_skip : 6; + u64 xqe_imm_size : 6; + u64 rsvd_189_184 : 6; + u64 xqe_imm_copy : 1; + u64 xqe_hdr_split : 1; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 spb_pool_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_aura_drop : 8; + u64 wqe_pool_pass : 8; + u64 wqe_pool_drop : 8; + u64 xqe_pass : 8; + u64 xqe_drop : 8; +#else + u64 xqe_drop : 8; + u64 xqe_pass : 8; + u64 wqe_pool_drop : 8; + u64 wqe_pool_pass : 8; + u64 spb_aura_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_pool_pass : 8; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */ + u64 rsvd_319_315 : 5; + u64 qint_idx : 7; + u64 rq_int_ena : 8; + u64 rq_int : 8; + u64 rsvd_291_288 : 4; + u64 lpb_pool_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_aura_pass : 8; + u64 lpb_aura_drop : 8; +#else + u64 lpb_aura_drop : 8; + u64 lpb_aura_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_pool_pass : 8; + u64 rsvd_291_288 : 4; + u64 rq_int : 8; + u64 rq_int_ena : 8; + u64 qint_idx : 7; + u64 rsvd_319_315 : 5; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */ + u64 rsvd_383_366 : 18; + u64 flow_tagw : 6; + u64 bad_utag : 8; + u64 good_utag : 8; + u64 ltag : 24; +#else + u64 ltag : 24; + u64 good_utag : 8; + u64 bad_utag : 8; + u64 flow_tagw : 6; + u64 rsvd_383_366 : 18; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */ + u64 rsvd_447_432 : 16; + u64 octs : 48; +#else + u64 octs : 48; + u64 rsvd_447_432 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W7 */ + u64 rsvd_511_496 : 16; + u64 pkts : 48; +#else + u64 pkts : 48; + u64 rsvd_511_496 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */ + u64 rsvd_575_560 : 16; + u64 drop_octs : 48; +#else + u64 drop_octs : 48; + u64 rsvd_575_560 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ + u64 rsvd_639_624 : 16; + u64 drop_pkts : 48; +#else + u64 drop_pkts : 48; + u64 rsvd_639_624 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */ + u64 rsvd_703_688 : 16; + u64 re_pkts : 48; +#else + u64 re_pkts : 48; + u64 rsvd_703_688 : 16; +#endif + u64 rsvd_767_704; /* W11 */ + u64 rsvd_831_768; /* W12 */ + u64 rsvd_895_832; /* W13 */ + u64 rsvd_959_896; /* W14 */ + u64 rsvd_1023_960; /* W15 */ +}; + +/* NIX sqe sizes */ +enum nix_maxsqesz { + NIX_MAXSQESZ_W16 = 0x0, + NIX_MAXSQESZ_W8 = 0x1, +}; + +/* NIX SQB caching type */ +enum nix_stype { + NIX_STYPE_STF = 0x0, + NIX_STYPE_STT = 0x1, + NIX_STYPE_STP = 0x2, +}; + +/* NIX Send queue context structure */ +struct nix_sq_ctx_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ + u64 sqe_way_mask : 16; + u64 cq : 20; + u64 sdp_mcast : 1; + u64 substream : 20; + u64 qint_idx : 6; + u64 ena : 1; +#else + u64 ena : 1; + u64 qint_idx : 6; + u64 substream : 20; + u64 sdp_mcast : 1; + u64 cq : 20; + u64 sqe_way_mask : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 sqb_count : 16; + u64 default_chan : 12; + u64 smq_rr_quantum : 24; + u64 sso_ena : 1; + u64 xoff : 1; + u64 cq_ena : 1; + u64 smq : 9; +#else + u64 smq : 9; + u64 cq_ena : 1; + u64 xoff : 1; + u64 sso_ena : 1; + u64 smq_rr_quantum : 24; + u64 default_chan : 12; + u64 sqb_count : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 rsvd_191 : 1; + u64 sqe_stype : 2; + u64 sq_int_ena : 8; + u64 sq_int : 8; + u64 sqb_aura : 20; + u64 smq_rr_count : 25; +#else + u64 smq_rr_count : 25; + u64 sqb_aura : 20; + u64 sq_int : 8; + u64 sq_int_ena : 8; + u64 sqe_stype : 2; + u64 rsvd_191 : 1; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 rsvd_255_253 : 3; + u64 smq_next_sq_vld : 1; + u64 smq_pend : 1; + u64 smenq_next_sqb_vld : 1; + u64 head_offset : 6; + u64 smenq_offset : 6; + u64 tail_offset : 6; + u64 smq_lso_segnum : 8; + u64 smq_next_sq : 20; + u64 mnq_dis : 1; + u64 lmt_dis : 1; + u64 cq_limit : 8; + u64 max_sqe_size : 2; +#else + u64 max_sqe_size : 2; + u64 cq_limit : 8; + u64 lmt_dis : 1; + u64 mnq_dis : 1; + u64 smq_next_sq : 20; + u64 smq_lso_segnum : 8; + u64 tail_offset : 6; + u64 smenq_offset : 6; + u64 head_offset : 6; + u64 smenq_next_sqb_vld : 1; + u64 smq_pend : 1; + u64 smq_next_sq_vld : 1; + u64 rsvd_255_253 : 3; +#endif + u64 next_sqb : 64;/* W4 */ + u64 tail_sqb : 64;/* W5 */ + u64 smenq_sqb : 64;/* W6 */ + u64 smenq_next_sqb : 64;/* W7 */ + u64 head_sqb : 64;/* W8 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ + u64 rsvd_639_630 : 10; + u64 vfi_lso_vld : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_mps : 14; + u64 vfi_lso_sb : 8; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_total : 18; + u64 rsvd_583_576 : 8; +#else + u64 rsvd_583_576 : 8; + u64 vfi_lso_total : 18; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_sb : 8; + u64 vfi_lso_mps : 14; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vld : 1; + u64 rsvd_639_630 : 10; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */ + u64 rsvd_703_658 : 46; + u64 scm_lso_rem : 18; +#else + u64 scm_lso_rem : 18; + u64 rsvd_703_658 : 46; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W11 */ + u64 rsvd_767_752 : 16; + u64 octs : 48; +#else + u64 octs : 48; + u64 rsvd_767_752 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W12 */ + u64 rsvd_831_816 : 16; + u64 pkts : 48; +#else + u64 pkts : 48; + u64 rsvd_831_816 : 16; +#endif + u64 rsvd_895_832 : 64;/* W13 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W14 */ + u64 rsvd_959_944 : 16; + u64 dropped_octs : 48; +#else + u64 dropped_octs : 48; + u64 rsvd_959_944 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W15 */ + u64 rsvd_1023_1008 : 16; + u64 dropped_pkts : 48; +#else + u64 dropped_pkts : 48; + u64 rsvd_1023_1008 : 16; +#endif +}; + +/* NIX Receive side scaling entry structure*/ +struct nix_rsse_s { +#if defined(__BIG_ENDIAN_BITFIELD) + uint32_t reserved_20_31 : 12; + uint32_t rq : 20; +#else + uint32_t rq : 20; + uint32_t reserved_20_31 : 12; + +#endif +}; + +/* NIX receive multicast/mirror entry structure */ +struct nix_rx_mce_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ + uint64_t next : 16; + uint64_t pf_func : 16; + uint64_t rsvd_31_24 : 8; + uint64_t index : 20; + uint64_t eol : 1; + uint64_t rsvd_2 : 1; + uint64_t op : 2; +#else + uint64_t op : 2; + uint64_t rsvd_2 : 1; + uint64_t eol : 1; + uint64_t index : 20; + uint64_t rsvd_31_24 : 8; + uint64_t pf_func : 16; + uint64_t next : 16; +#endif +}; + +enum nix_lsoalg { + NIX_LSOALG_NOP, + NIX_LSOALG_ADD_SEGNUM, + NIX_LSOALG_ADD_PAYLEN, + NIX_LSOALG_ADD_OFFSET, + NIX_LSOALG_TCP_FLAGS, +}; + +enum nix_txlayer { + NIX_TXLAYER_OL3, + NIX_TXLAYER_OL4, + NIX_TXLAYER_IL3, + NIX_TXLAYER_IL4, +}; + +struct nix_lso_format { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd_19_63 : 45; + u64 alg : 3; + u64 rsvd_14_15 : 2; + u64 sizem1 : 2; + u64 rsvd_10_11 : 2; + u64 layer : 2; + u64 offset : 8; +#else + u64 offset : 8; + u64 layer : 2; + u64 rsvd_10_11 : 2; + u64 sizem1 : 2; + u64 rsvd_14_15 : 2; + u64 alg : 3; + u64 rsvd_19_63 : 45; +#endif +}; + #endif /* RVU_STRUCT_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a53736c26c0c..a5a0823e5ada 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -308,10 +308,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_FLOW_TABLE: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: - case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_DEALLOC_MEMIC: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -426,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_FPGA_CREATE_QP: case MLX5_CMD_OP_FPGA_MODIFY_QP: @@ -435,6 +436,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_ALLOC_MEMIC: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -599,8 +601,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); - MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); - MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); @@ -617,6 +619,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index a4179122a279..4b85abb5c9f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cons_index = 0; cq->arm_sn = 0; cq->eq = eq; + cq->uid = MLX5_GET(create_cq_in, in, uid); refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) @@ -144,6 +145,7 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, din, uid, cq->uid); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } @@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, in, uid, cq->uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; @@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + MLX5_SET(modify_cq_in, in, uid, cq->uid); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 0240aee9189e..d027ce00c8ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg, {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ - {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ @@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule, memcpy(__entry->destination, &rule->dest_attr, sizeof(__entry->destination)); - if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && - rule->dest_attr.counter) + if (rule->dest_attr.type & + MLX5_FLOW_DESTINATION_TYPE_COUNTER) __entry->counter_id = - rule->dest_attr.counter->id; + rule->dest_attr.counter_id; ), TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", __entry->rule, __entry->fte, __entry->index, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 24e3b564964f..023dc4bccd28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -235,3 +235,211 @@ out: kfree(out); return err; } + +static u32 fec_supported_speeds[] = { + 10000, + 40000, + 25000, + 50000, + 56000, + 100000 +}; + +#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds) + +/* get/set FEC admin field for a given speed */ +static int mlx5e_fec_admin_field(u32 *pplm, + u8 *fec_policy, + bool write, + u32 speed) +{ + switch (speed) { + case 10000: + case 40000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_cap_10g_40g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_10g_40g, *fec_policy); + break; + case 25000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_25g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_25g, *fec_policy); + break; + case 50000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_50g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_50g, *fec_policy); + break; + case 56000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_56g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_56g, *fec_policy); + break; + case 100000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_100g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_100g, *fec_policy); + break; + default: + return -EINVAL; + } + return 0; +} + +/* returns FEC capabilities for a given speed */ +static int mlx5e_get_fec_cap_field(u32 *pplm, + u8 *fec_cap, + u32 speed) +{ + switch (speed) { + case 10000: + case 40000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_admin_10g_40g); + break; + case 25000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_25g); + break; + case 50000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_50g); + break; + case 56000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_56g); + break; + case 100000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_100g); + break; + default: + return -EINVAL; + } + return 0; +} + +int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) +{ + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u32 current_fec_speed; + int err; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); + if (err) + return err; + + return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed); +} + +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u8 *fec_configured_mode) +{ + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u32 link_speed; + int err; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); + + if (!fec_configured_mode) + return 0; + + err = mlx5e_port_linkspeed(dev, &link_speed); + if (err) + return err; + + return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed); +} + +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) +{ + bool fec_mode_not_supp_in_speed = false; + u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u32 current_fec_speed; + u8 fec_caps = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); + if (err) + return err; + + memset(in, 0, sz); + MLX5_SET(pplm_reg, in, local_port, 1); + for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) { + mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]); + /* policy supported for link speed */ + if (!!(fec_caps & fec_policy)) { + mlx5e_fec_admin_field(in, &fec_policy, 1, + fec_supported_speeds[i]); + } else { + if (fec_supported_speeds[i] == current_fec_speed) + return -EOPNOTSUPP; + mlx5e_fec_admin_field(in, &no_fec_policy, 1, + fec_supported_speeds[i]); + fec_mode_not_supp_in_speed = true; + } + } + + if (fec_mode_not_supp_in_speed) + mlx5_core_dbg(dev, + "FEC policy 0x%x is not supported for some speeds", + fec_policy); + + return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index f8cbd8194179..cd2160b8c9bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -45,4 +45,16 @@ int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); + +int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps); +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u8 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy); + +enum { + MLX5E_FEC_NOFEC, + MLX5E_FEC_FIRECODE, + MLX5E_FEC_RS_528_514, +}; + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index db3278cc052b..3078491cc0d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c61f6302cde2..3e770abfd802 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -547,6 +547,70 @@ static void ptys2ethtool_adver_link(unsigned long *advertising_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static const u32 pplm_fec_2_ethtool[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF, + [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER, + [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, +}; + +static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) +{ + int mode = 0; + + if (!fec_mode) + return ETHTOOL_FEC_AUTO; + + mode = find_first_bit(&fec_mode, size); + + if (mode < ARRAY_SIZE(pplm_fec_2_ethtool)) + return pplm_fec_2_ethtool[mode]; + + return 0; +} + +/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */ +static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code) +{ + u32 offset; + + offset = find_first_bit(ðtool_fec_code, sizeof(u32)); + offset -= ETHTOOL_FEC_OFF_BIT; + offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT; + + return offset; +} + +static int get_fec_supported_advertised(struct mlx5_core_dev *dev, + struct ethtool_link_ksettings *link_ksettings) +{ + u_long fec_caps = 0; + u32 active_fec = 0; + u32 offset; + u32 bitn; + int err; + + err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps); + if (err) + return (err == -EOPNOTSUPP) ? 0 : err; + + err = mlx5e_get_fec_mode(dev, &active_fec, NULL); + if (err) + return err; + + for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) { + u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn]; + + offset = ethtool_fec2ethtool_caps(ethtool_bitmask); + __set_bit(offset, link_ksettings->link_modes.supported); + } + + active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE); + offset = ethtool_fec2ethtool_caps(active_fec); + __set_bit(offset, link_ksettings->link_modes.advertising); + + return 0; +} + static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, u32 eth_proto_cap, u8 connector_type) @@ -742,7 +806,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, if (err) { netdev_err(netdev, "%s: query port ptys failed: %d\n", __func__, err); - goto err_query_ptys; + goto err_query_regs; } eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); @@ -778,11 +842,17 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, AUTONEG_ENABLE; ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); + + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) + netdev_dbg(netdev, "%s: FEC caps query failed: %d\n", + __func__, err); + if (!an_disable_admin) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Autoneg); -err_query_ptys: +err_query_regs: return err; } @@ -1277,6 +1347,58 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static int mlx5e_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 fec_configured = 0; + u32 fec_active = 0; + int err; + + err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured); + + if (err) + return err; + + fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active, + sizeof(u32) * BITS_PER_BYTE); + + if (!fecparam->active_fec) + return -EOPNOTSUPP; + + fecparam->fec = pplm2ethtool_fec((u_long)fec_configured, + sizeof(u8) * BITS_PER_BYTE); + + return 0; +} + +static int mlx5e_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 fec_policy = 0; + int mode; + int err; + + for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { + if (!(pplm_fec_2_ethtool[mode] & fecparam->fec)) + continue; + fec_policy |= (1 << mode); + break; + } + + err = mlx5e_set_fec_mode(mdev, fec_policy); + + if (err) + return err; + + mlx5_toggle_port_link(mdev); + + return 0; +} + static u32 mlx5e_get_msglevel(struct net_device *dev) { return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; @@ -1699,4 +1821,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .self_test = mlx5e_self_test, .get_msglevel = mlx5e_get_msglevel, .set_msglevel = mlx5e_set_msglevel, + .get_fecparam = mlx5e_get_fecparam, + .set_fecparam = mlx5e_set_fecparam, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c9848e333450..1243edbedc9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3392,9 +3392,6 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 64c2b9ea8b1e..c3c657548824 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -853,9 +853,6 @@ static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); @@ -869,9 +866,6 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f19067c94272..2f7fb8de6967 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -433,10 +433,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, struct mlx5_wq_cyc *wq, - u16 pi, u16 frag_pi) + u16 pi, u16 nnops) { struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi]; - u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; edge_wi = wi + nnops; @@ -455,15 +454,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); - u16 pi, frag_pi; + u16 pi, contig_wqebbs_room; int err; int i; pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); - - if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) { - mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { + mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 1c006869a642..1e55b9c27ffc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -614,46 +614,82 @@ static const struct counter_desc pport_phy_statistical_stats_desc[] = { { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, }; -#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc) +static const struct counter_desc +pport_phy_statistical_err_lanes_stats_desc[] = { + { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) }, + { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) }, + { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) }, + { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, +}; + +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + int num_stats; + /* "1" for link_down_events special counter */ - return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ? - NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1; + num_stats = 1; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ? + NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0; + + return num_stats; } static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { + struct mlx5_core_dev *mdev = priv->mdev; int i; strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy"); - if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) return idx; for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_phy_statistical_stats_desc[i].format); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_err_lanes_stats_desc[i].format); + return idx; } static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { + struct mlx5_core_dev *mdev = priv->mdev; int i; /* link_down_events_phy has special handling since it is not stored in __be64 format */ data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); - if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) return idx; for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, + i); return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6de21d9f4fad..608025ca5c04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr { u32 hairpin_tirn; u8 match_level; struct mlx5_flow_table *hairpin_ft; + struct mlx5_fc *counter; }; #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) @@ -73,6 +74,7 @@ enum { MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), + MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), }; #define MLX5E_TC_MAX_SPLITS 1 @@ -81,7 +83,7 @@ struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u8 flags; + u16 flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; struct list_head encap; /* flows sharing the same encap ID */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ @@ -100,11 +102,6 @@ struct mlx5e_tc_flow_parse_attr { int mirred_ifindex; }; -enum { - MLX5_HEADER_TYPE_VXLAN = 0x0, - MLX5_HEADER_TYPE_NVGRE = 0x1, -}; - #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) @@ -677,7 +674,7 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, } } -static struct mlx5_flow_handle * +static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, @@ -688,19 +685,17 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .has_flow_tag = true, .flow_tag = attr->flow_tag, - .encap_id = 0, + .reformat_id = 0, + .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - struct mlx5_flow_handle *rule; bool table_created = false; int err, dest_ix = 0; if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { - rule = ERR_PTR(err); goto err_add_hairpin_flow; } if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { @@ -720,22 +715,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) { - rule = ERR_CAST(counter); + err = PTR_ERR(counter); goto err_fc_create; } dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dest_ix].counter = counter; + dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; + attr->counter = counter; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_create_mod_hdr_id; - } } if (IS_ERR_OR_NULL(priv->fs.tc.t)) { @@ -761,7 +755,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - rule = ERR_CAST(priv->fs.tc.t); + err = PTR_ERR(priv->fs.tc.t); goto err_create_ft; } @@ -771,13 +765,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->match_level != MLX5_MATCH_NONE) parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, - &flow_act, dest, dest_ix); + flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, dest, dest_ix); - if (IS_ERR(rule)) + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); goto err_add_rule; + } - return rule; + return 0; err_add_rule: if (table_created) { @@ -793,7 +789,7 @@ err_fc_create: if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) mlx5e_hairpin_flow_del(priv, flow); err_add_hairpin_flow: - return rule; + return err; } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@ -802,7 +798,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = attr->counter; mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); @@ -829,28 +825,115 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack); static struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_flow_handle *rule; + + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(rule)) + return rule; + + if (attr->mirror_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); + if (IS_ERR(flow->rule[1])) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + return flow->rule[1]; + } + } + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + return rule; +} + +static void +mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + + if (attr->mirror_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); +} + +static struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *slow_attr) +{ + struct mlx5_flow_handle *rule; + + memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + slow_attr->mirror_count = 0, + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN, + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + if (!IS_ERR(rule)) + flow->flags |= MLX5E_TC_FLOW_SLOW; + + return rule; +} + +static void +mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *slow_attr) +{ + memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + flow->flags &= ~MLX5E_TC_FLOW_SLOW; +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; + u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; - struct mlx5_flow_handle *rule = NULL; + struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - int err; + int err = 0, encap_err = 0; + + /* if prios are not supported, keep the old behaviour of using same prio + * for all offloaded rules. + */ + if (!mlx5_eswitch_prios_supported(esw)) + attr->prio = 1; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->chain > max_chain) { + NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); + err = -EOPNOTSUPP; + goto err_max_prio_chain; + } + + if (attr->prio > max_prio) { + NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); + err = -EOPNOTSUPP; + goto err_max_prio_chain; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { out_dev = __dev_get_by_index(dev_net(priv->netdev), attr->parse_attr->mirred_ifindex); - err = mlx5e_attach_encap(priv, &parse_attr->tun_info, - out_dev, &encap_dev, flow, extack); - if (err) { - rule = ERR_PTR(err); - if (err != -EAGAIN) - goto err_attach_encap; + encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow, + extack); + if (encap_err && encap_err != -EAGAIN) { + err = encap_err; + goto err_attach_encap; } out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@ -859,49 +942,58 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } err = mlx5_eswitch_add_vlan_action(esw, attr); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_add_vlan; - } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_mod_hdr; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_create_counter; } + + attr->counter = counter; } - /* we get here if (1) there's no error (rule being null) or when + /* we get here if (1) there's no error or when * (2) there's an encap action and we're on -EAGAIN (no valid neigh) */ - if (rule != ERR_PTR(-EAGAIN)) { - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - - if (attr->mirror_count) { - flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(flow->rule[1])) - goto err_fwd_rule; - } + if (encap_err == -EAGAIN) { + /* continue with goto slow path rule instead */ + struct mlx5_esw_flow_attr slow_attr; + + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr); + } else { + flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); } - return rule; -err_fwd_rule: - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - rule = flow->rule[1]; + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_add_rule; + } + + return 0; + err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) mlx5e_detach_encap(priv, flow); err_attach_encap: - return rule; +err_max_prio_chain: + return err; } static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, @@ -909,36 +1001,43 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_esw_flow_attr slow_attr; if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + if (flow->flags & MLX5E_TC_FLOW_SLOW) + mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } mlx5_eswitch_del_vlan_action(esw, attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { mlx5e_detach_encap(priv, flow); kvfree(attr->parse_attr); } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(esw->dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; int err; - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - e->encap_size, e->encap_header, - &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) { mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", err); @@ -950,26 +1049,20 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, list_for_each_entry(flow, &e->flows, encap) { esw_attr = flow->esw_attr; esw_attr->encap_id = e->encap_id; - flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); + spec = &esw_attr->parse_attr->spec; + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); continue; } - if (esw_attr->mirror_count) { - flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr); - if (IS_ERR(flow->rule[1])) { - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr); - err = PTR_ERR(flow->rule[1]); - mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n", - err); - continue; - } - } - - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ + flow->rule[0] = rule; } } @@ -977,25 +1070,44 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr slow_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; + int err; list_for_each_entry(flow, &e->flows, encap) { - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - struct mlx5_esw_flow_attr *attr = flow->esw_attr; + spec = &flow->esw_attr->parse_attr->spec; + + /* update from encap rule to slow path rule */ + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ + flow->rule[0] = rule; } if (e->flags & MLX5_ENCAP_ENTRY_VALID) { e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_encap_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); } } +static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + return flow->esw_attr->counter; + else + return flow->nic_attr->counter; +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; @@ -1021,7 +1133,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) continue; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = mlx5e_tc_get_counter(flow); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { neigh_used = true; @@ -1061,7 +1173,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_encap_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); hash_del_rcu(&e->encap_hlist); kfree(e->encap_header); @@ -2391,7 +2503,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, return -ENOMEM; switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; fl4.fl4_dport = tun_key->tp_dst; break; @@ -2435,7 +2547,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, tos, ttl, fl4.daddr, @@ -2455,8 +2567,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, goto out; } - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - ipv4_encap_size, encap_header, &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) goto destroy_neigh_entry; @@ -2500,7 +2614,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, return -ENOMEM; switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: fl6.flowi6_proto = IPPROTO_UDP; fl6.fl6_dport = tun_key->tp_dst; break; @@ -2544,7 +2658,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, tos, ttl, &fl6.daddr, @@ -2565,8 +2679,10 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, goto out; } - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - ipv6_encap_size, encap_header, &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) goto destroy_neigh_entry; @@ -2617,7 +2733,7 @@ vxlan_encap_offload_err: if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_HEADER_TYPE_VXLAN; + tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; } else { NL_SET_ERR_MSG_MOD(extack, "port isn't an offloaded vxlan udp dport"); @@ -2728,6 +2844,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; @@ -2797,7 +2914,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, parse_attr->mirred_ifindex = out_dev->ifindex; parse_attr->tun_info = *info; attr->parse_attr = parse_attr; - action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; /* attr->out_rep is resolved when we handle encap */ @@ -2836,6 +2953,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_gact_goto_chain(a)) { + u32 dest_chain = tcf_gact_goto_chain_index(a); + u32 max_chain = mlx5_eswitch_get_chain_range(esw); + + if (dest_chain <= attr->chain) { + NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); + return -EOPNOTSUPP; + } + if (dest_chain > max_chain) { + NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = dest_chain; + + continue; + } + return -EINVAL; } @@ -2853,9 +2989,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static void get_flags(int flags, u8 *flow_flags) +static void get_flags(int flags, u16 *flow_flags) { - u8 __flow_flags = 0; + u16 __flow_flags = 0; if (flags & MLX5E_TC_INGRESS) __flow_flags |= MLX5E_TC_FLOW_INGRESS; @@ -2884,34 +3020,15 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) return &priv->fs.tc.ht; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) +static int +mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, + struct tc_cls_flower_offload *f, u16 flow_flags, + struct mlx5e_tc_flow_parse_attr **__parse_attr, + struct mlx5e_tc_flow **__flow) { - struct netlink_ext_ack *extack = f->common.extack; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct rhashtable *tc_ht = get_tc_ht(priv); struct mlx5e_tc_flow *flow; - int attr_size, err = 0; - u8 flow_flags = 0; - - get_flags(flags, &flow_flags); - - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (flow) { - NL_SET_ERR_MSG_MOD(extack, - "flow cookie already exists, ignoring"); - netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie); - return 0; - } - - if (esw && esw->mode == SRIOV_OFFLOADS) { - flow_flags |= MLX5E_TC_FLOW_ESWITCH; - attr_size = sizeof(struct mlx5_esw_flow_attr); - } else { - flow_flags |= MLX5E_TC_FLOW_NIC; - attr_size = sizeof(struct mlx5_nic_flow_attr); - } + int err; flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@ -2925,49 +3042,161 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, flow->priv = priv; err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err < 0) + if (err) goto err_free; - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, - extack); - if (err < 0) - goto err_free; - flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, - extack); - } else { - err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, - extack); - if (err < 0) - goto err_free; - flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, - extack); - } + *__flow = flow; + *__parse_attr = parse_attr; - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - if (err != -EAGAIN) - goto err_free; - } + return 0; + +err_free: + kfree(flow); + kvfree(parse_attr); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct mlx5e_tc_flow **__flow) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; - if (err != -EAGAIN) - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flags |= MLX5E_TC_FLOW_ESWITCH; + attr_size = sizeof(struct mlx5_esw_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; - if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + flow->esw_attr->chain = f->common.chain_index; + flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); + if (err) + goto err_free; + + if (!(flow->esw_attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)) kvfree(parse_attr); - err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); - if (err) { - mlx5e_tc_del_flow(priv, flow); - kfree(flow); - } + *__flow = flow; + return 0; + +err_free: + kfree(flow); + kvfree(parse_attr); +out: return err; +} + +static int +mlx5e_add_nic_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct mlx5e_tc_flow **__flow) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + /* multi-chain not supported for NIC rules */ + if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + return -EOPNOTSUPP; + + flow_flags |= MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + if (err) + goto err_free; + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + kvfree(parse_attr); + *__flow = flow; + + return 0; err_free: + kfree(flow); kvfree(parse_attr); +out: + return err; +} + +static int +mlx5e_tc_add_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + int flags, + struct mlx5e_tc_flow **flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u16 flow_flags; + int err; + + get_flags(flags, &flow_flags); + + if (!tc_can_offload_extack(priv->netdev, f->common.extack)) + return -EOPNOTSUPP; + + if (esw && esw->mode == SRIOV_OFFLOADS) + err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + else + err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + + return err; +} + +int mlx5e_configure_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5e_tc_flow *flow; + int err = 0; + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + NL_SET_ERR_MSG_MOD(extack, + "flow cookie already exists, ignoring"); + netdev_warn_once(priv->netdev, + "flow cookie %lx already exists, ignoring\n", + f->cookie); + goto out; + } + + err = mlx5e_tc_add_flow(priv, f, flags, &flow); + if (err) + goto out; + + err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + if (err) + goto err_free; + + return 0; + +err_free: + mlx5e_tc_del_flow(priv, flow); kfree(flow); +out: return err; } @@ -3018,7 +3247,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) return 0; - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = mlx5e_tc_get_counter(flow); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index ae73ea992845..6dacaeba2fbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -290,10 +290,9 @@ dma_unmap_wqe_err: static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, - u16 pi, u16 frag_pi) + u16 pi, u16 nnops) { struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; edge_wi = wi + nnops; @@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe_info *wi; struct mlx5e_sq_stats *stats = sq->stats; + u16 headlen, ihs, contig_wqebbs_room; u16 ds_cnt, ds_cnt_inl = 0; - u16 headlen, ihs, frag_pi; u8 num_wqebbs, opcode; u32 num_bytes; int num_dma; @@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); - if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { - mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); } @@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe_info *wi; struct mlx5e_sq_stats *stats = sq->stats; - u16 headlen, ihs, pi, frag_pi; + u16 headlen, ihs, pi, contig_wqebbs_room; u16 ds_cnt, ds_cnt_inl = 0; u8 num_wqebbs, opcode; u32 num_bytes; @@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, } num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); - if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); } - mlx5i_sq_fetch_wqe(sq, &wqe, &pi); + mlx5i_sq_fetch_wqe(sq, &wqe, pi); /* fill wqe */ wi = &sq->db.wqe_info[pi]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 48864f4988a4..c1e1a16a9b07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -273,7 +273,7 @@ static void eq_pf_process(struct mlx5_eq *eq) case MLX5_PFAULT_SUBTYPE_WQE: /* WQE based event */ pfault->type = - be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; pfault->token = be32_to_cpu(pf_eqe->wqe.token); pfault->wqe.wq_num = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ea7dedc2d5ad..d004957328f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create FDB log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + root_ns = mlx5_get_fdb_sub_ns(dev, 0); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); return -EOPNOTSUPP; @@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter = counter; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); dst = &drop_ctr_dst; dest_num++; } @@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter = counter; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); dst = &drop_ctr_dst; dest_num++; } @@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index dfc642de4e6d..aaafc9f17115 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -59,6 +59,10 @@ #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) +#define FDB_MAX_CHAIN 3 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 16 + struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allow_untagged_spoofchk_grp; @@ -120,6 +124,13 @@ struct mlx5_vport { u16 enabled_events; }; +enum offloads_fdb_flags { + ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), +}; + +extern const unsigned int ESW_POOLS[4]; + +#define PRIO_LEVELS 2 struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -130,16 +141,24 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { - struct mlx5_flow_table *fast_fdb; - struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; + + struct { + struct mlx5_flow_table *fdb; + u32 num_rules; + } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS]; + /* Protects fdb_prio table */ + struct mutex fdb_prio_lock; + + int fdb_left[ARRAY_SIZE(ESW_POOLS)]; } offloads; }; + u32 flags; }; struct mlx5_esw_offload { @@ -181,6 +200,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; + int nvports; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); @@ -228,6 +248,19 @@ void mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr); +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw); + +u16 +mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw); + +u32 +mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, @@ -266,6 +299,10 @@ struct mlx5_esw_flow_attr { u32 encap_id; u32 mod_hdr_id; u8 match_level; + struct mlx5_fc *counter; + u32 chain; + u16 prio; + u32 dest_chain; struct mlx5e_tc_flow_parse_attr *parse_attr; }; @@ -318,6 +355,11 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} + +#define FDB_MAX_CHAIN 1 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 1 + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a35a2310f871..9eac137790f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,33 +37,59 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "en.h" +#include "fs_core.h" enum { FDB_FAST_PATH = 0, FDB_SLOW_PATH }; +#define fdb_prio_table(esw, chain, prio, level) \ + (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] + +static struct mlx5_flow_table * +esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); +static void +esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); + +bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw) +{ + return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)); +} + +u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) + return FDB_MAX_CHAIN; + + return 0; +} + +u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) + return FDB_MAX_PRIO; + + return 1; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_table *ft = NULL; - struct mlx5_fc *counter = NULL; + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + bool mirror = !!(attr->mirror_count); struct mlx5_flow_handle *rule; + struct mlx5_flow_table *fdb; int j, i = 0; void *misc; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - if (attr->mirror_count) - ft = esw->fdb_table.offloads.fwd_fdb; - else - ft = esw->fdb_table.offloads.fast_fdb; - flow_act.action = attr->action; /* if per flow vlan pop/push is emulated, don't set that into the firmware */ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) @@ -81,23 +107,33 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - for (j = attr->mirror_count; j < attr->out_count; j++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[j]->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + if (attr->dest_chain) { + struct mlx5_flow_table *ft; + + ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0); + if (IS_ERR(ft)) { + rule = ERR_CAST(ft); + goto err_create_goto_table; + } + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; i++; + } else { + for (j = attr->mirror_count; j < attr->out_count; j++) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.vhca_id = + MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); + dest[i].vport.vhca_id_valid = + !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + i++; + } } } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(counter)) { - rule = ERR_CAST(counter); - goto err_counter_alloc; - } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[i].counter = counter; + dest[i].counter_id = mlx5_fc_id(attr->counter); i++; } @@ -127,10 +163,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - flow_act.encap_id = attr->encap_id; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + flow_act.reformat_id = attr->encap_id; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i); + fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror); + if (IS_ERR(fdb)) { + rule = ERR_CAST(fdb); + goto err_esw_get; + } + + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -139,8 +181,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); -err_counter_alloc: + esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); +err_esw_get: + if (attr->dest_chain) + esw_put_prio_table(esw, attr->dest_chain, 1, 0); +err_create_goto_table: return rule; } @@ -150,11 +195,25 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; - struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *fast_fdb; + struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; void *misc; int i; + fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); + if (IS_ERR(fast_fdb)) { + rule = ERR_CAST(fast_fdb); + goto err_get_fast; + } + + fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1); + if (IS_ERR(fwd_fdb)) { + rule = ERR_CAST(fwd_fdb); + goto err_get_fwd; + } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < attr->mirror_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -164,7 +223,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = esw->fdb_table.offloads.fwd_fdb, + dest[i].ft = fwd_fdb, i++; misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); @@ -187,12 +246,41 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; - rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i); + rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); - if (!IS_ERR(rule)) - esw->offloads.num_flows++; + if (IS_ERR(rule)) + goto add_err; + + esw->offloads.num_flows++; return rule; +add_err: + esw_put_prio_table(esw, attr->chain, attr->prio, 1); +err_get_fwd: + esw_put_prio_table(esw, attr->chain, attr->prio, 0); +err_get_fast: + return rule; +} + +static void +__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr, + bool fwd_rule) +{ + bool mirror = (attr->mirror_count > 0); + + mlx5_del_flow_rules(rule); + esw->offloads.num_flows--; + + if (fwd_rule) { + esw_put_prio_table(esw, attr->chain, attr->prio, 1); + esw_put_prio_table(esw, attr->chain, attr->prio, 0); + } else { + esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + if (attr->dest_chain) + esw_put_prio_table(esw, attr->dest_chain, 1, 0); + } } void @@ -200,12 +288,15 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr) { - struct mlx5_fc *counter = NULL; + __mlx5_eswitch_del_rule(esw, rule, attr, false); +} - counter = mlx5_flow_rule_counter(rule); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(esw->dev, counter); - esw->offloads.num_flows--; +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, true); } static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) @@ -294,7 +385,8 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + !attr->dest_chain); err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) @@ -501,74 +593,170 @@ out: #define ESW_OFFLOADS_NUM_GROUPS 4 -static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), + * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via put/get_sz_to_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small + */ +#define ESW_SIZE (16 * 1024 * 1024) +const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, + 64 * 1024, 4 * 1024 }; + +static int +get_sz_from_pool(struct mlx5_eswitch *esw) +{ + int sz = 0, i; + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { + if (esw->fdb_table.offloads.fdb_left[i]) { + --esw->fdb_table.offloads.fdb_left[i]; + sz = ESW_POOLS[i]; + break; + } + } + + return sz; +} + +static void +put_sz_to_pool(struct mlx5_eswitch *esw, int sz) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { + if (sz >= ESW_POOLS[i]) { + ++esw->fdb_table.offloads.fdb_left[i]; + break; + } + } +} + +static struct mlx5_flow_table * +create_next_size_table(struct mlx5_eswitch *esw, + struct mlx5_flow_namespace *ns, + u16 table_prio, + int level, + u32 flags) +{ + struct mlx5_flow_table *fdb; + int sz; + + sz = get_sz_from_pool(esw); + if (!sz) + return ERR_PTR(-ENOSPC); + + fdb = mlx5_create_auto_grouped_flow_table(ns, + table_prio, + sz, + ESW_OFFLOADS_NUM_GROUPS, + level, + flags); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(fdb), table_prio, level, sz); + put_sz_to_pool(esw, sz); + } + + return fdb; +} + +static struct mlx5_flow_table * +esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) { struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int esw_size, err = 0; + struct mlx5_flow_namespace *ns; + int table_prio, l = 0; u32 flags = 0; - u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - err = -EOPNOTSUPP; - goto out_namespace; - } + if (chain == FDB_SLOW_PATH_CHAIN) + return esw->fdb_table.offloads.slow_fdb; - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); + mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, - 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + fdb = fdb_prio_table(esw, chain, prio, level).fdb; + if (fdb) { + /* take ref on earlier levels as well */ + while (level >= 0) + fdb_prio_table(esw, chain, prio, level--).num_rules++; + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return fdb; + } - if (mlx5_esw_has_fwd_fdb(dev)) - esw_size >>= 1; + ns = mlx5_get_fdb_sub_ns(dev, chain); + if (!ns) { + esw_warn(dev, "Failed to get FDB sub namespace\n"); + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return ERR_PTR(-EOPNOTSUPP); + } if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN; + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - esw_size, - ESW_OFFLOADS_NUM_GROUPS, 0, - flags); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); - goto out_namespace; - } - esw->fdb_table.offloads.fast_fdb = fdb; + table_prio = (chain * FDB_MAX_PRIO) + prio - 1; - if (!mlx5_esw_has_fwd_fdb(dev)) - goto out_namespace; + /* create earlier levels for correct fs_core lookup when + * connecting tables + */ + for (l = 0; l <= level; l++) { + if (fdb_prio_table(esw, chain, prio, l).fdb) { + fdb_prio_table(esw, chain, prio, l).num_rules++; + continue; + } - fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - esw_size, - ESW_OFFLOADS_NUM_GROUPS, 1, - flags); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create fwd table err %d\n", err); - goto out_ft; + fdb = create_next_size_table(esw, ns, table_prio, l, flags); + if (IS_ERR(fdb)) { + l--; + goto err_create_fdb; + } + + fdb_prio_table(esw, chain, prio, l).fdb = fdb; + fdb_prio_table(esw, chain, prio, l).num_rules = 1; } - esw->fdb_table.offloads.fwd_fdb = fdb; - return err; + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return fdb; -out_ft: - mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); -out_namespace: - return err; +err_create_fdb: + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + if (l >= 0) + esw_put_prio_table(esw, chain, prio, l); + + return fdb; +} + +static void +esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) +{ + int l; + + if (chain == FDB_SLOW_PATH_CHAIN) + return; + + mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); + + for (l = level; l >= 0; l--) { + if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0) + continue; + + put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte); + mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb); + fdb_prio_table(esw, chain, prio, l).fdb = NULL; + } + + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); } -static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) { - if (mlx5_esw_has_fwd_fdb(esw->dev)) - mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb); - mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); + /* If lazy creation isn't supported, deref the fast path tables */ + if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) { + esw_put_prio_table(esw, 0, 1, 1); + esw_put_prio_table(esw, 0, 1, 0); + } } #define MAX_PF_SQ 256 @@ -579,12 +767,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; + u32 *flow_group_in, max_flow_counter; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int table_size, ix, err = 0; + int table_size, ix, err = 0, i; struct mlx5_flow_group *g; + u32 flags = 0, fdb_max; void *match_criteria; - u32 *flow_group_in; u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); @@ -599,12 +788,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) goto ns_err; } - err = esw_create_offloads_fast_fdb_table(esw); - if (err) - goto fast_fdb_err; + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, + fdb_max); + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) + esw->fdb_table.offloads.fdb_left[i] = + ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + /* create the slow path fdb with encap set, so further table instances + * can be created at run time while VFs are probed if the FW allows that. + */ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + ft_attr.flags = flags; ft_attr.max_fte = table_size; ft_attr.prio = FDB_SLOW_PATH; @@ -616,6 +822,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.slow_fdb = fdb; + /* If lazy creation isn't supported, open the fast path tables now */ + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n"); + esw_get_prio_table(esw, 0, 1, 0); + esw_get_prio_table(esw, 0, 1, 1); + } else { + esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n"); + esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + } + /* create send-to-vport group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -663,6 +881,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto miss_rule_err; + esw->nvports = nvports; kvfree(flow_group_in); return 0; @@ -671,10 +890,9 @@ miss_rule_err: miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: + esw_destroy_offloads_fast_fdb_tables(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: - esw_destroy_offloads_fast_fdb_table(esw); -fast_fdb_err: ns_err: kvfree(flow_group_in); return err; @@ -682,7 +900,7 @@ ns_err: static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { - if (!esw->fdb_table.offloads.fast_fdb) + if (!esw->fdb_table.offloads.slow_fdb) return; esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); @@ -692,7 +910,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); - esw_destroy_offloads_fast_fdb_table(esw); + esw_destroy_offloads_fast_fdb_tables(esw); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@ -949,6 +1167,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + err = esw_create_offloads_fdb_tables(esw, nvports); if (err) return err; @@ -1256,7 +1476,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && - (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) return -EOPNOTSUPP; @@ -1277,16 +1497,19 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return -EOPNOTSUPP; } - esw_destroy_offloads_fast_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; - err = esw_create_offloads_fast_fdb_table(esw); + + err = esw_create_offloads_fdb_tables(esw, esw->nvports); + if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed re-creating fast FDB table"); esw->offloads.encap = !encap; - (void)esw_create_offloads_fast_fdb_table(esw); + (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 5645a4facad2..515e3d6de051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -245,7 +245,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, return ERR_PTR(res); } - /* Context will be freed by wait func after completion */ + /* Context should be freed by the caller after completion. */ return context; } @@ -418,10 +418,8 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); cmd.flags = htonl(flags); context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); - if (IS_ERR(context)) { - err = PTR_ERR(context); - goto out; - } + if (IS_ERR(context)) + return PTR_ERR(context); err = mlx5_fpga_ipsec_cmd_wait(context); if (err) @@ -435,6 +433,7 @@ static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) } out: + kfree(context); return err; } @@ -650,7 +649,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - flow_act->has_flow_tag) + (flow_act->flags & FLOW_ACT_HAS_TAG)) return false; return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 8e01f818021b..08a891f9aade 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *next_ft, unsigned int *table_id, u32 flags) { - int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); + int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; @@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, } MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, - en_encap_decap); - MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en, - en_encap_decap); + en_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + en_encap); switch (op_mod) { case FS_FT_OP_MOD_NORMAL: @@ -343,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -417,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, continue; MLX5_SET(flow_counter_list, in_dests, flow_counter_id, - dst->dest_attr.counter->id); + dst->dest_attr.counter_id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } @@ -594,62 +596,78 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *bytes = MLX5_GET64(traffic_counter, stats, octets); } -int mlx5_encap_alloc(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id) +int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + u32 *packet_reformat_id) { - int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); - u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; - void *encap_header_in; - void *header; + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)]; + void *packet_reformat_context_in; + int max_encap_size; + void *reformat; int inlen; int err; u32 *in; + if (namespace == MLX5_FLOW_NAMESPACE_FDB) + max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); + else + max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); + if (size > max_encap_size) { mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", size, max_encap_size); return -EINVAL; } - in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size, GFP_KERNEL); if (!in) return -ENOMEM; - encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header); - header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header); - inlen = header - (void *)in + size; + packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in, + in, packet_reformat_context); + reformat = MLX5_ADDR_OF(packet_reformat_context_in, + packet_reformat_context_in, + reformat_data); + inlen = reformat - (void *)in + size; memset(in, 0, inlen); - MLX5_SET(alloc_encap_header_in, in, opcode, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER); - MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size); - MLX5_SET(encap_header_in, encap_header_in, header_type, header_type); - memcpy(header, encap_header, size); + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_data_size, size); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_type, reformat_type); + memcpy(reformat, reformat_data, size); memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); kfree(in); return err; } +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); -void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + u32 packet_reformat_id) { - u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)]; memset(in, 0, sizeof(in)); - MLX5_SET(dealloc_encap_header_in, in, opcode, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); - MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id); + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + packet_reformat_id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, u8 namespace, u8 num_actions, @@ -667,9 +685,14 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, table_type = FS_FT_FDB; break; case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_BYPASS: max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); table_type = FS_FT_NIC_RX; break; + case MLX5_FLOW_NAMESPACE_EGRESS: + max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_TX; + break; default: return -EOPNOTSUPP; } @@ -702,6 +725,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, kfree(in); return err; } +EXPORT_SYMBOL(mlx5_modify_header_alloc); void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) { @@ -716,6 +740,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_modify_header_dealloc); static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, @@ -760,8 +785,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_FDB: case FS_FT_SNIFFER_RX: case FS_FT_SNIFFER_TX: - return mlx5_fs_cmd_get_fw_cmds(); case FS_FT_NIC_TX: + return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 37d114c668b7..67ba4c975d81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,6 +40,7 @@ #include "diag/fs_tracepoint.h" #include "accel/ipsec.h" #include "fpga/ipsec.h" +#include "eswitch.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -76,6 +77,14 @@ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) +#define FS_CHAINING_CAPS_EGRESS \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit.flow_table_modify)) + #define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 @@ -151,6 +160,17 @@ static struct init_tree_node { } }; +static struct init_tree_node egress_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 1, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + enum fs_i_lock_class { FS_LOCK_GRANDPARENT, FS_LOCK_PARENT, @@ -694,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root) + if (!root || root->type == FS_TYPE_PRIO_CHAINS) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -1388,7 +1408,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_DECAP | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | @@ -1408,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if (flow_act->has_flow_tag && + if ((flow_act->flags & FLOW_ACT_HAS_TAG) && fte->action.flow_tag != flow_act->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", @@ -1455,29 +1475,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, return handle; } -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle) +static bool counter_is_valid(u32 action) { - struct mlx5_flow_rule *dst; - struct fs_fte *fte; - - fs_get_obj(fte, handle->rule[0]->node.parent); - - fs_for_each_dst(dst, fte) { - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) - return dst->dest_attr.counter; - } - - return NULL; -} - -static bool counter_is_valid(struct mlx5_fc *counter, u32 action) -{ - if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) - return !counter; - - if (!counter) - return false; - return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); } @@ -1487,7 +1486,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, struct mlx5_flow_table *ft) { if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) - return counter_is_valid(dest->counter, action); + return counter_is_valid(action); if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return true; @@ -1629,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, search_again_locked: version = matched_fgs_get_version(match_head); + if (flow_act->flags & FLOW_ACT_NO_APPEND) + goto skip_search; /* Try to find a fg that already contains a matching fte */ list_for_each_entry(iter, match_head, list) { struct fs_fte *fte_tmp; @@ -1645,6 +1646,11 @@ search_again_locked: return rule; } +skip_search: + /* No group with matching fte found, or we skipped the search. + * Try to add a new fte to any matching fg. + */ + /* Check the ft version, for case that new flow group * was added while the fgs weren't locked */ @@ -1975,12 +1981,24 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) fg->id); } +struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, + int n) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering || !steering->fdb_sub_ns) + return NULL; + + return steering->fdb_sub_ns[n]; +} +EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); + struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { struct mlx5_flow_steering *steering = dev->priv.steering; struct mlx5_flow_root_namespace *root_ns; - int prio; + int prio = 0; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; @@ -1988,40 +2006,29 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return NULL; switch (type) { - case MLX5_FLOW_NAMESPACE_BYPASS: - case MLX5_FLOW_NAMESPACE_LAG: - case MLX5_FLOW_NAMESPACE_OFFLOADS: - case MLX5_FLOW_NAMESPACE_ETHTOOL: - case MLX5_FLOW_NAMESPACE_KERNEL: - case MLX5_FLOW_NAMESPACE_LEFTOVERS: - case MLX5_FLOW_NAMESPACE_ANCHOR: - prio = type; - break; case MLX5_FLOW_NAMESPACE_FDB: if (steering->fdb_root_ns) return &steering->fdb_root_ns->ns; - else - return NULL; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_RX: if (steering->sniffer_rx_root_ns) return &steering->sniffer_rx_root_ns->ns; - else - return NULL; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_TX: if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; - else - return NULL; - case MLX5_FLOW_NAMESPACE_EGRESS: - if (steering->egress_root_ns) - return &steering->egress_root_ns->ns; - else - return NULL; - default: return NULL; + default: + break; + } + + if (type == MLX5_FLOW_NAMESPACE_EGRESS) { + root_ns = steering->egress_root_ns; + } else { /* Must be NIC RX */ + root_ns = steering->root_ns; + prio = type; } - root_ns = steering->root_ns; if (!root_ns) return NULL; @@ -2064,8 +2071,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d } } -static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned int prio, int num_levels) +static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels, + enum fs_node_type type) { struct fs_prio *fs_prio; @@ -2073,7 +2082,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, if (!fs_prio) return ERR_PTR(-ENOMEM); - fs_prio->node.type = FS_TYPE_PRIO; + fs_prio->node.type = type; tree_init_node(&fs_prio->node, NULL, del_sw_prio); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; @@ -2083,6 +2092,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return fs_prio; } +static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS); +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO); +} + static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace *ns) { @@ -2387,6 +2409,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_egress_acls_root_ns(dev); cleanup_ingress_acls_root_ns(dev); cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); cleanup_root_ns(steering->egress_root_ns); @@ -2432,27 +2457,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + struct mlx5_flow_namespace *ns; + struct fs_prio *maj_prio; + struct fs_prio *min_prio; + int levels; + int chain; + int prio; + int err; steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); if (!steering->fdb_root_ns) return -ENOMEM; - prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2); - if (IS_ERR(prio)) + steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * + FDB_MAX_CHAIN + 1, GFP_KERNEL); + if (!steering->fdb_sub_ns) + return -ENOMEM; + + levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0, + levels); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); goto out_err; + } + + for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { + ns = fs_create_namespace(maj_prio); + if (IS_ERR(ns)) { + err = PTR_ERR(ns); + goto out_err; + } + + for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) { + min_prio = fs_create_prio(ns, prio, 2); + if (IS_ERR(min_prio)) { + err = PTR_ERR(min_prio); + goto out_err; + } + } + + steering->fdb_sub_ns[chain] = ns; + } - prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); - if (IS_ERR(prio)) + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); goto out_err; + } set_prio_attrs(steering->fdb_root_ns); return 0; out_err: cleanup_root_ns(steering->fdb_root_ns); + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; steering->fdb_root_ns = NULL; - return PTR_ERR(prio); + return err; } static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) @@ -2537,16 +2599,23 @@ cleanup_root_ns: static int init_egress_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + int err; steering->egress_root_ns = create_root_ns(steering, FS_FT_NIC_TX); if (!steering->egress_root_ns) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + err = init_root_tree(steering, &egress_root_fs, + &steering->egress_root_ns->ns.node); + if (err) + goto cleanup; + set_prio_attrs(steering->egress_root_ns); + return 0; +cleanup: + cleanup_root_ns(steering->egress_root_ns); + steering->egress_root_ns = NULL; + return err; } int mlx5_init_fs(struct mlx5_core_dev *dev) @@ -2614,7 +2683,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (MLX5_IPSEC_DEV(dev)) { + if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) goto err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a06f83c0c2b6..b51ad217da32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -38,9 +38,21 @@ #include <linux/rhashtable.h> #include <linux/llist.h> +/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, + * and those are in parallel to one another when going over them to connect + * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one + * parallel namespace will not automatically connect to the first flow table + * found in any prio in any next namespace, but skip the entire containing + * TYPE_PRIO_CHAINS prio. + * + * This is used to implement tc chains, each chain of prios is a different + * namespace inside a containing TYPE_PRIO_CHAINS prio. + */ + enum fs_node_type { FS_TYPE_NAMESPACE, FS_TYPE_PRIO, + FS_TYPE_PRIO_CHAINS, FS_TYPE_FLOW_TABLE, FS_TYPE_FLOW_GROUP, FS_TYPE_FLOW_ENTRY, @@ -73,6 +85,7 @@ struct mlx5_flow_steering { struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_namespace **fdb_sub_ns; struct mlx5_flow_root_namespace **esw_egress_root_ns; struct mlx5_flow_root_namespace **esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 09206c4acd9a..32accd6b041b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -99,6 +99,18 @@ static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev, list_add_tail(&counter->list, next); } +static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + list_del(&counter->list); + + spin_lock(&fc_stats->counters_idr_lock); + WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id)); + spin_unlock(&fc_stats->counters_idr_lock); +} + /* The function returns the last counter that was queried so the caller * function can continue calling it till all counters are queried. */ @@ -179,20 +191,23 @@ static void mlx5_fc_stats_work(struct work_struct *work) struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, priv.fc_stats.work.work); struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - struct llist_node *tmplist = llist_del_all(&fc_stats->addlist); + /* Take dellist first to ensure that counters cannot be deleted before + * they are inserted. + */ + struct llist_node *dellist = llist_del_all(&fc_stats->dellist); + struct llist_node *addlist = llist_del_all(&fc_stats->addlist); struct mlx5_fc *counter = NULL, *last = NULL, *tmp; unsigned long now = jiffies; - if (tmplist || !list_empty(&fc_stats->counters)) + if (addlist || !list_empty(&fc_stats->counters)) queue_delayed_work(fc_stats->wq, &fc_stats->work, fc_stats->sampling_interval); - llist_for_each_entry(counter, tmplist, addlist) + llist_for_each_entry(counter, addlist, addlist) mlx5_fc_stats_insert(dev, counter); - tmplist = llist_del_all(&fc_stats->dellist); - llist_for_each_entry_safe(counter, tmp, tmplist, dellist) { - list_del(&counter->list); + llist_for_each_entry_safe(counter, tmp, dellist, dellist) { + mlx5_fc_stats_remove(dev, counter); mlx5_free_fc(dev, counter); } @@ -258,6 +273,12 @@ err_out: } EXPORT_SYMBOL(mlx5_fc_create); +u32 mlx5_fc_id(struct mlx5_fc *counter) +{ + return counter->id; +} +EXPORT_SYMBOL(mlx5_fc_id); + void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; @@ -266,10 +287,6 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) return; if (counter->aging) { - spin_lock(&fc_stats->counters_idr_lock); - WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id)); - spin_unlock(&fc_stats->counters_idr_lock); - llist_add(&counter->dellist, &fc_stats->dellist); mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 5ef3ef0072b4..9165ca567047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -110,12 +110,11 @@ struct mlx5i_tx_wqe { static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, struct mlx5i_tx_wqe **wqe, - u16 *pi) + u16 pi) { struct mlx5_wq_cyc *wq = &sq->wq; - *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + *wqe = mlx5_wq_cyc_get_wqe(wq, pi); memset(*wqe, 0, sizeof(**wqe)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cc298527baf1..0594d0961cb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -39,6 +39,7 @@ #include <linux/if_link.h> #include <linux/firmware.h> #include <linux/mlx5/cq.h> +#include <linux/mlx5/fs.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" @@ -171,17 +172,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); -int mlx5_encap_alloc(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id); -void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); - -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id); -void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 4ca07bfb6b14..91b8139a388d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, } qp->qpn = MLX5_GET(create_dct_out, out, dctn); + qp->uid = MLX5_GET(create_dct_in, in, uid); err = create_resource_common(dev, qp, MLX5_RES_DCT); if (err) goto err_cmd; @@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, err_cmd: MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, din, uid, qp->uid); mlx5_cmd_exec(dev, (void *)&in, sizeof(din), (void *)&out, sizeof(dout)); return err; @@ -240,6 +242,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, if (err) return err; + qp->uid = MLX5_GET(create_qp_in, in, uid); qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); @@ -261,6 +264,7 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, uid, qp->uid); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } @@ -275,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT); MLX5_SET(drain_dct_in, in, dctn, qp->qpn); + MLX5_SET(drain_dct_in, in, uid, qp->uid); return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), (void *)&out, sizeof(out)); } @@ -301,6 +306,7 @@ destroy: destroy_resource_common(dev, &dct->mqp); MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, in, uid, qp->uid); err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), (void *)&out, sizeof(out)); return err; @@ -320,6 +326,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, in, uid, qp->uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; @@ -373,7 +380,7 @@ static void mbox_free(struct mbox_info *mbox) static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, u32 opt_param_mask, void *qpc, - struct mbox_info *mbox) + struct mbox_info *mbox, u16 uid) { mbox->out = NULL; mbox->in = NULL; @@ -381,26 +388,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, #define MBOX_ALLOC(mbox, typ) \ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out)) -#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \ - MLX5_SET(typ##_in, in, opcode, _opcode); \ - MLX5_SET(typ##_in, in, qpn, _qpn) - -#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \ - MOD_QP_IN_SET(typ, in, _opcode, _qpn); \ - MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ - memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc)) +#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \ + do { \ + MLX5_SET(typ##_in, in, opcode, _opcode); \ + MLX5_SET(typ##_in, in, qpn, _qpn); \ + MLX5_SET(typ##_in, in, uid, _uid); \ + } while (0) + +#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \ + do { \ + MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \ + MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ + memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \ + MLX5_ST_SZ_BYTES(qpc)); \ + } while (0) switch (opcode) { /* 2RST & 2ERR */ case MLX5_CMD_OP_2RST_QP: if (MBOX_ALLOC(mbox, qp_2rst)) return -ENOMEM; - MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn); + MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid); break; case MLX5_CMD_OP_2ERR_QP: if (MBOX_ALLOC(mbox, qp_2err)) return -ENOMEM; - MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn); + MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid); break; /* MODIFY with QPC */ @@ -408,37 +421,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, if (MBOX_ALLOC(mbox, rst2init_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_INIT2RTR_QP: if (MBOX_ALLOC(mbox, init2rtr_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_RTR2RTS_QP: if (MBOX_ALLOC(mbox, rtr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_RTS2RTS_QP: if (MBOX_ALLOC(mbox, rts2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_SQERR2RTS_QP: if (MBOX_ALLOC(mbox, sqerr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_INIT2INIT_QP: if (MBOX_ALLOC(mbox, init2init_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; default: mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n", @@ -456,7 +469,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, int err; err = modify_qp_mbox_alloc(dev, opcode, qp->qpn, - opt_param_mask, qpc, &mbox); + opt_param_mask, qpc, &mbox, qp->uid); if (err) return err; @@ -531,6 +544,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); +static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {}; + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + MLX5_SET(destroy_rq_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq) { @@ -541,6 +565,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, if (err) return err; + rq->uid = MLX5_GET(create_rq_in, in, uid); rq->qpn = rqn; err = create_resource_common(dev, rq, MLX5_RES_RQ); if (err) @@ -549,7 +574,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return 0; err_destroy_rq: - mlx5_core_destroy_rq(dev, rq->qpn); + destroy_rq_tracked(dev, rq->qpn, rq->uid); return err; } @@ -559,10 +584,21 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); - mlx5_core_destroy_rq(dev, rq->qpn); + destroy_rq_tracked(dev, rq->qpn, rq->uid); } EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); +static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {}; + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + MLX5_SET(destroy_sq_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq) { @@ -573,6 +609,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, if (err) return err; + sq->uid = MLX5_GET(create_sq_in, in, uid); sq->qpn = sqn; err = create_resource_common(dev, sq, MLX5_RES_SQ); if (err) @@ -581,7 +618,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return 0; err_destroy_sq: - mlx5_core_destroy_sq(dev, sq->qpn); + destroy_sq_tracked(dev, sq->qpn, sq->uid); return err; } @@ -591,7 +628,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq) { destroy_resource_common(dev, sq); - mlx5_core_destroy_sq(dev, sq->qpn); + destroy_sq_tracked(dev, sq->qpn, sq->uid); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 7e20666ce5ae..6a6fc9be01e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (!create_in) return -ENOMEM; + MLX5_SET(create_srq_in, create_in, uid, in->uid); srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); @@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, err = mlx5_cmd_exec(dev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); - if (!err) + if (!err) { srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } return err; } @@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_srq_in, srq_in, opcode, MLX5_CMD_OP_DESTROY_SRQ); MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, if (!create_in) return -ENOMEM; + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); @@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, goto out; srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; out: kvfree(create_in); return err; @@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); @@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); @@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, wq = MLX5_ADDR_OF(rmpc, rmpc, wq); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + if (!err) + srq->uid = in->uid; kvfree(create_in); return err; @@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - return mlx5_core_destroy_rmp(dev, srq->srqn); + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int arm_rmp_cmd(struct mlx5_core_dev *dev, @@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); MLX5_SET(wq, wq, lwm, lwm); MLX5_SET(rmp_bitmask, bitmask, lwm, 1); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); @@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(xrqc, xrqc, user_index, in->user_index); MLX5_SET(xrqc, xrqc, cqn, in->cqn); MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); err = mlx5_cmd_exec(dev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); - if (!err) + if (!err) { srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } return err; } @@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 68e7f8df2a6d..2dcbf1ebfd6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq) return (u32)wq->fbc.sz_m1 + 1; } -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq) -{ - return wq->fbc.frag_sz_m1 + 1; -} - u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) { return wq->fbc.sz_m1 + 1; @@ -54,54 +49,37 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) return (u32)wq->fbc.sz_m1 + 1; } -static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq) -{ - return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride; -} - -static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq) -{ - return mlx5_wq_cyc_get_byte_size(&wq->rq) + - mlx5_wq_cyc_get_byte_size(&wq->sq); -} - -static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) +static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride) { - return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride; -} - -static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq) -{ - return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride; + return ((u32)1 << log_sz) << log_stride; } int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; int err; - mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), - MLX5_GET(wq, wqc, log_wq_sz), - fbc); - wq->sz = wq->fbc.sz_m1 + 1; - err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); goto err_db_free; } - fbc->frag_buf = wq_ctrl->buf; - wq->db = wq_ctrl->db.db; + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); + wq->sz = mlx5_wq_cyc_get_size(wq); wq_ctrl->mdev = mdev; @@ -113,46 +91,19 @@ err_db_free: return err; } -static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf, - struct mlx5_wq_qp *qp) -{ - struct mlx5_frag_buf_ctrl *sq_fbc; - struct mlx5_frag_buf *rqb, *sqb; - - rqb = &qp->rq.fbc.frag_buf; - *rqb = *buf; - rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq); - rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE); - - sq_fbc = &qp->sq.fbc; - sqb = &sq_fbc->frag_buf; - *sqb = *buf; - sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq); - sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE); - sqb->frags += rqb->npages; /* first part is for the rq */ - if (sq_fbc->strides_offset) - sqb->frags--; -} - int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u16 sq_strides_offset; - u32 rq_pg_remainder; - int err; + u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; + u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size); + u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB); + u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size); - mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4, - MLX5_GET(qpc, qpc, log_rq_size), - &wq->rq.fbc); + u32 rq_byte_size; + int err; - rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE; - sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB; - mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB), - MLX5_GET(qpc, qpc, log_sq_size), - sq_strides_offset, - &wq->sq.fbc); err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -160,14 +111,32 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq), + err = mlx5_frag_buf_alloc_node(mdev, + wq_get_byte_sz(log_rq_sz, log_rq_stride) + + wq_get_byte_sz(log_sq_sz, log_sq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); goto err_db_free; } - mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq); + mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc); + + rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride); + + if (rq_byte_size < PAGE_SIZE) { + /* SQ starts within the same page of the RQ */ + u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB; + + mlx5_init_fbc_offset(wq_ctrl->buf.frags, + log_sq_stride, log_sq_sz, sq_strides_offset, + &wq->sq.fbc); + } else { + u16 rq_npages = rq_byte_size >> PAGE_SHIFT; + + mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages, + log_sq_stride, log_sq_sz, &wq->sq.fbc); + } wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; @@ -186,17 +155,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; + u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); int err; - mlx5_core_init_cq_frag_buf(&wq->fbc, cqc); - err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { @@ -205,8 +176,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - wq->fbc.frag_buf = wq_ctrl->buf; - wq->db = wq_ctrl->db.db; + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc); wq_ctrl->mdev = mdev; @@ -222,30 +192,29 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; struct mlx5_wqe_srq_next_seg *next_seg; int err; int i; - mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), - MLX5_GET(wq, wqc, log_wq_sz), - fbc); - err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); goto err_db_free; } - wq->fbc.frag_buf = wq_ctrl->buf; - wq->db = wq_ctrl->db.db; + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); for (i = 0; i < fbc->sz_m1; i++) { next_seg = mlx5_wq_ll_get_wqe(wq, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 3a1a170bb2d7..b1293d153a58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) return ctr & wq->fbc.sz_m1; } -static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr) -{ - return ctr & wq->fbc.frag_sz_m1; -} - static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) { return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); @@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix) return mlx5_frag_buf_get_wqe(&wq->fbc, ix); } +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix) +{ + return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1; +} + static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) { int equal = (cc1 == cc2); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 68fa44a41485..1f77e97e2d7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -27,7 +27,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl_flex_keys.o \ spectrum1_mr_tcam.o spectrum2_mr_tcam.o \ spectrum_mr_tcam.o spectrum_mr.o \ - spectrum_qdisc.o spectrum_span.o + spectrum_qdisc.o spectrum_span.o \ + spectrum_nve.o spectrum_nve_vxlan.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 81533d7f395c..937d0ace699a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1055,6 +1055,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_driver_init: mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: + mlxsw_hwmon_fini(mlxsw_core->hwmon); err_hwmon_init: if (!reload) devlink_unregister(devlink); @@ -1088,6 +1089,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); + mlxsw_hwmon_fini(mlxsw_core->hwmon); if (!reload) devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 655ddd204ab2..c35be477856f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -359,6 +359,10 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; } +static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ +} + #endif struct mlxsw_thermal; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index f6cf2896d337..e04e8162aa14 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -303,8 +303,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, struct device *hwmon_dev; int err; - mlxsw_hwmon = devm_kzalloc(mlxsw_bus_info->dev, sizeof(*mlxsw_hwmon), - GFP_KERNEL); + mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL); if (!mlxsw_hwmon) return -ENOMEM; mlxsw_hwmon->core = mlxsw_core; @@ -321,10 +320,9 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; - hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev, - "mlxsw", - mlxsw_hwmon, - mlxsw_hwmon->groups); + hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", mlxsw_hwmon, + mlxsw_hwmon->groups); if (IS_ERR(hwmon_dev)) { err = PTR_ERR(hwmon_dev); goto err_hwmon_register; @@ -337,5 +335,12 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, err_hwmon_register: err_fans_init: err_temp_init: + kfree(mlxsw_hwmon); return err; } + +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ + hwmon_device_unregister(mlxsw_hwmon->hwmon_dev); + kfree(mlxsw_hwmon); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ed7e4c4e7403..8a4983adae94 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2994,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_qdiscs_init; } + err = mlxsw_sp_port_nve_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n", + mlxsw_sp_port->local_port); + goto err_port_nve_init; + } + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", @@ -3022,6 +3029,8 @@ err_register_netdev: mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); err_port_vlan_get: + mlxsw_sp_port_nve_fini(mlxsw_sp_port); +err_port_nve_init: mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); err_port_qdiscs_init: mlxsw_sp_port_fids_fini(mlxsw_sp_port); @@ -3061,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + mlxsw_sp_port_nve_fini(mlxsw_sp_port); mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); mlxsw_sp_port_fids_fini(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -3795,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_afa_init; } + err = mlxsw_sp_nve_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n"); + goto err_nve_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3841,6 +3857,8 @@ err_acl_init: err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_nve_fini(mlxsw_sp); +err_nve_init: mlxsw_sp_afa_fini(mlxsw_sp); err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -3873,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3887,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3900,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_acl_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_nve_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -4566,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); } +static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev) +{ + unsigned int num_vxlans = 0; + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + num_vxlans++; + } + + return num_vxlans > 1; +} + +static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + if (br_multicast_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (br_vlan_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge"); + return false; + } + + return true; +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, struct net_device *dev, unsigned long event, void *ptr) @@ -4595,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; if (netdev_has_any_upper_dev(upper_dev) && (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, @@ -4752,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; if (netdev_has_any_upper_dev(upper_dev) && (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, @@ -4898,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } +static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *cu_info; + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + extack = netdev_notifier_info_to_extack(info); + + switch (event) { + case NETDEV_CHANGEUPPER: + cu_info = container_of(info, + struct netdev_notifier_changeupper_info, + info); + upper_dev = cu_info->upper_dev; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; + if (cu_info->linking) { + if (!netif_running(dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, + dev, extack); + } else { + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + } + break; + case NETDEV_PRE_UP: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, + extack); + case NETDEV_DOWN: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + break; + } + + return 0; +} + static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4914,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, } mlxsw_sp_span_respin(mlxsw_sp); + if (netif_is_vxlan(dev)) + err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 1f68ac2a20f4..0875a79cbe7b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -16,6 +16,7 @@ #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> +#include <net/vxlan.h> #include "port.h" #include "core.h" @@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id { struct mlxsw_sp_port; struct mlxsw_sp_rif; struct mlxsw_sp_span_entry; +enum mlxsw_sp_l3proto; +union mlxsw_sp_l3addr; struct mlxsw_sp_upper { struct net_device *dev; @@ -113,9 +116,11 @@ struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; struct mlxsw_sp_kvdl; +struct mlxsw_sp_nve; struct mlxsw_sp_kvdl_ops; struct mlxsw_sp_mr_tcam_ops; struct mlxsw_sp_acl_tcam_ops; +struct mlxsw_sp_nve_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -132,6 +137,7 @@ struct mlxsw_sp { struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct mlxsw_sp_kvdl *kvdl; + struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; @@ -146,6 +152,7 @@ struct mlxsw_sp { const struct mlxsw_afk_ops *afk_ops; const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; + const struct mlxsw_sp_nve_ops **nve_ops_arr; }; static inline struct mlxsw_sp_upper * @@ -235,6 +242,25 @@ struct mlxsw_sp_port { struct mlxsw_sp_acl_block *eg_acl_block; }; +static inline struct net_device * +mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + return dev; + } + + return NULL; +} + +static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev); +} + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *br_dev); bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev); +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack); +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -431,6 +464,15 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index); +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip); +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { @@ -679,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni); +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni); +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid); +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni); +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid); int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, bool member); @@ -697,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex); struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, u16 rif_index); struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp); @@ -742,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops; /* spectrum2_mr_tcam.c */ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops; +/* spectrum_nve.c */ +enum mlxsw_sp_nve_type { + MLXSW_SP_NVE_TYPE_VXLAN, +}; + +struct mlxsw_sp_nve_params { + enum mlxsw_sp_nve_type type; + __be32 vni; + const struct net_device *dev; +}; + +extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[]; +extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[]; + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp); +bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, + u32 tb_id, __be32 addr); +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack); +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid); +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 715d24ff937e..a3db033d7399 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -6,6 +6,7 @@ #include <linux/if_vlan.h> #include <linux/if_bridge.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/rtnetlink.h> #include "spectrum.h" @@ -14,6 +15,7 @@ struct mlxsw_sp_fid_family; struct mlxsw_sp_fid_core { + struct rhashtable vni_ht; struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX]; unsigned int *port_fid_mappings; }; @@ -24,6 +26,12 @@ struct mlxsw_sp_fid { unsigned int ref_count; u16 fid_index; struct mlxsw_sp_fid_family *fid_family; + + struct rhash_head vni_ht_node; + __be32 vni; + u32 nve_flood_index; + u8 vni_valid:1, + nve_flood_index_valid:1; }; struct mlxsw_sp_fid_8021q { @@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d { int br_ifindex; }; +static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp_fid, vni), + .key_offset = offsetof(struct mlxsw_sp_fid, vni), + .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node), +}; + struct mlxsw_sp_flood_table { enum mlxsw_sp_flood_type packet_type; enum mlxsw_reg_sfgc_bridge_type bridge_type; @@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops { struct mlxsw_sp_port *port, u16 vid); void (*port_vid_unmap)(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *port, u16 vid); + int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni); + void (*vni_clear)(struct mlxsw_sp_fid *fid); + int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); + void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid); }; struct mlxsw_sp_fid_family { @@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = { [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, }; +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni) +{ + struct mlxsw_sp_fid *fid; + + fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni, + mlxsw_sp_fid_vni_ht_params); + if (fid) + fid->ref_count++; + + return fid; +} + +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni) +{ + if (!fid->vni_valid) + return -EINVAL; + + *vni = fid->vni; + + return 0; +} + +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + int err; + + if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid)) + return -EINVAL; + + err = ops->nve_flood_index_set(fid, nve_flood_index); + if (err) + return err; + + fid->nve_flood_index = nve_flood_index; + fid->nve_flood_index_valid = true; + + return 0; +} + +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + + if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid)) + return; + + fid->nve_flood_index_valid = false; + ops->nve_flood_index_clear(fid); +} + +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->nve_flood_index_valid; +} + +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + int err; + + if (WARN_ON(!ops->vni_set || fid->vni_valid)) + return -EINVAL; + + fid->vni = vni; + err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht, + &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + if (err) + return err; + + err = ops->vni_set(fid, vni); + if (err) + goto err_vni_set; + + fid->vni_valid = true; + + return 0; + +err_vni_set: + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + return err; +} + +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + + if (WARN_ON(!ops->vni_clear || !fid->vni_valid)) + return; + + fid->vni_valid = false; + ops->vni_clear(fid); + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); +} + +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->vni_valid; +} + static const struct mlxsw_sp_flood_table * mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type) @@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } +static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, + __be32 vni, bool vni_valid, u32 nve_flood_index, + bool nve_flood_index_valid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index, + 0); + mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid); + mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni)); + mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid); + mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, u16 vid, bool valid) { @@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid) static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid) { + if (fid->vni_valid) + mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid); mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); } @@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid, mlxsw_sp_port->local_port, vid, false); } +static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni, + true, fid->nve_flood_index, + fid->nve_flood_index_valid); +} + +static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false, + fid->nve_flood_index, fid->nve_flood_index_valid); +} + +static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, + fid->vni, fid->vni_valid, nve_flood_index, + true); +} + +static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni, + fid->vni_valid, 0, false); +} + static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .setup = mlxsw_sp_fid_8021d_setup, .configure = mlxsw_sp_fid_8021d_configure, @@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .flood_index = mlxsw_sp_fid_8021d_flood_index, .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map, .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap, + .vni_set = mlxsw_sp_fid_8021d_vni_set, + .vni_clear = mlxsw_sp_fid_8021d_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, }; static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = { @@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = { [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family, }; -static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_fid_type type, - const void *arg) +static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) { struct mlxsw_sp_fid_family *fid_family; struct mlxsw_sp_fid *fid; - u16 fid_index; - int err; fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; list_for_each_entry(fid, &fid_family->fids_list, list) { @@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, return fid; } + return NULL; +} + +static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) +{ + struct mlxsw_sp_fid_family *fid_family; + struct mlxsw_sp_fid *fid; + u16 fid_index; + int err; + + fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg); + if (fid) + return fid; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; fid = kzalloc(fid_family->fid_size, GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); @@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex); } +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex) +{ + return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, + &br_ifindex); +} + struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, u16 rif_index) { @@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; mlxsw_sp->fid_core = fid_core; + err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params); + if (err) + goto err_rhashtable_init; + fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int), GFP_KERNEL); if (!fid_core->port_fid_mappings) { @@ -944,6 +1156,8 @@ err_fid_ops_register: } kfree(fid_core->port_fid_mappings); err_alloc_port_fid_mappings: + rhashtable_destroy(&fid_core->vni_ht); +err_rhashtable_init: kfree(fid_core); return err; } @@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_fid_family_unregister(mlxsw_sp, fid_core->fid_family_arr[i]); kfree(fid_core->port_fid_mappings); + rhashtable_destroy(&fid_core->vni_ht); kfree(fid_core); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c new file mode 100644 index 000000000000..ad06d9969bc1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -0,0 +1,982 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> + +#include "reg.h" +#include "spectrum.h" +#include "spectrum_nve.h" + +const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp1_nve_vxlan_ops, +}; + +const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp2_nve_vxlan_ops, +}; + +struct mlxsw_sp_nve_mc_entry; +struct mlxsw_sp_nve_mc_record; +struct mlxsw_sp_nve_mc_list; + +struct mlxsw_sp_nve_mc_record_ops { + enum mlxsw_reg_tnumt_record_type type; + int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); + void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry); + void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index); + bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); +}; + +struct mlxsw_sp_nve_mc_list_key { + u16 fid_index; +}; + +struct mlxsw_sp_nve_mc_ipv6_entry { + struct in6_addr addr6; + u32 addr6_kvdl_index; +}; + +struct mlxsw_sp_nve_mc_entry { + union { + __be32 addr4; + struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry; + }; + u8 valid:1; +}; + +struct mlxsw_sp_nve_mc_record { + struct list_head list; + enum mlxsw_sp_l3proto proto; + unsigned int num_entries; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_nve_mc_list *mc_list; + const struct mlxsw_sp_nve_mc_record_ops *ops; + u32 kvdl_index; + struct mlxsw_sp_nve_mc_entry entries[0]; +}; + +struct mlxsw_sp_nve_mc_list { + struct list_head records_list; + struct rhash_head ht_node; + struct mlxsw_sp_nve_mc_list_key key; +}; + +static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = { + .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key), + .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key), + .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node), +}; + +static int +mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + mc_entry->addr4 = addr->addr4; + + return 0; +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip = be32_to_cpu(mc_entry->addr4); + + mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip); +} + +static bool +mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return mc_entry->addr4 == addr->addr4; +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv4_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4, + .entry_add = &mlxsw_sp_nve_mc_record_ipv4_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv4_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv4_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv4_entry_compare, +}; + +static int +mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + WARN_ON(1); + + return -EINVAL; +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index; + + mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr); +} + +static bool +mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6); +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv6_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6, + .entry_add = &mlxsw_sp_nve_mc_record_ipv6_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv6_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv6_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv6_entry_compare, +}; + +static const struct mlxsw_sp_nve_mc_record_ops * +mlxsw_sp_nve_mc_record_ops_arr[] = { + [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops, + [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops, +}; + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + return rhashtable_lookup_fast(&nve->mc_list_ht, key, + mlxsw_sp_nve_mc_list_ht_params); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL); + if (!mc_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&mc_list->records_list); + mc_list->key = *key; + + err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_insert; + + return mc_list; + +err_rhashtable_insert: + kfree(mc_list); + return ERR_PTR(err); +} + +static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + WARN_ON(!list_empty(&mc_list->records_list)); + kfree(mc_list); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve_mc_list *mc_list; + + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key); + if (mc_list) + return mc_list; + + return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key); +} + +static void +mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + if (!list_empty(&mc_list->records_list)) + return; + mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto]; + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = kzalloc(sizeof(*mc_record) + num_max_entries * + sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL); + if (!mc_record) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + &mc_record->kvdl_index); + if (err) + goto err_kvdl_alloc; + + mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto]; + mc_record->mlxsw_sp = mlxsw_sp; + mc_record->mc_list = mc_list; + mc_record->proto = proto; + list_add_tail(&mc_record->list, &mc_list->records_list); + + return mc_record; + +err_kvdl_alloc: + kfree(mc_record); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + + list_del(&mc_record->list); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + mc_record->kvdl_index); + WARN_ON(mc_record->num_entries); + kfree(mc_record); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) { + unsigned int num_entries = mc_record->num_entries; + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + if (mc_record->proto == proto && + num_entries < nve->num_max_mc_entries[proto]) + return mc_record; + } + + return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto); +} + +static void +mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record) +{ + if (mc_record->num_entries != 0) + return; + + mlxsw_sp_nve_mc_record_destroy(mc_record); +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + if (mc_record->entries[i].valid) + continue; + return &mc_record->entries[i]; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record) +{ + enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type; + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + char tnumt_pl[MLXSW_REG_TNUMT_LEN]; + unsigned int num_max_entries; + unsigned int num_entries = 0; + u32 next_kvdl_index = 0; + bool next_valid = false; + int i; + + if (!list_is_last(&mc_record->list, &mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + next_kvdl_index = next_record->kvdl_index; + next_valid = true; + } + + mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE, + mc_record->kvdl_index, next_valid, + next_kvdl_index, mc_record->num_entries); + + num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl, + num_entries++); + } + + WARN_ON(num_entries != mc_record->num_entries); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl); +} + +static bool +mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp_nve_mc_record *first_record; + + first_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mc_record == first_record; +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + if (mc_record->ops->entry_compare(mc_record, mc_entry, addr)) + return mc_entry; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_entry *mc_entry = NULL; + int err; + + mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record); + if (WARN_ON(!mc_entry)) + return -EINVAL; + + err = mc_record->ops->entry_add(mc_record, mc_entry, addr); + if (err) + return err; + mc_record->num_entries++; + mc_entry->valid = true; + + err = mlxsw_sp_nve_mc_record_refresh(mc_record); + if (err) + goto err_record_refresh; + + /* If this is a new record and not the first one, then we need to + * update the next pointer of the previous entry + */ + if (mc_record->num_entries != 1 || + mlxsw_sp_nve_mc_record_is_first(mc_record)) + return 0; + + err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list)); + if (err) + goto err_prev_record_refresh; + + return 0; + +err_prev_record_refresh: +err_record_refresh: + mc_entry->valid = false; + mc_record->num_entries--; + mc_record->ops->entry_del(mc_record, mc_entry); + return err; +} + +static void +mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + + mc_entry->valid = false; + mc_record->num_entries--; + + /* When the record continues to exist we only need to invalidate + * the requested entry + */ + if (mc_record->num_entries != 0) { + mlxsw_sp_nve_mc_record_refresh(mc_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the record needs to be deleted, but it is not the first, + * then we need to make sure that the previous record no longer + * points to it. Remove deleted record from the list to reflect + * that and then re-add it at the end, so that it could be + * properly removed by the record destruction code + */ + if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) { + struct mlxsw_sp_nve_mc_record *prev_record; + + prev_record = list_prev_entry(mc_record, list); + list_del(&mc_record->list); + mlxsw_sp_nve_mc_record_refresh(prev_record); + list_add_tail(&mc_record->list, &mc_list->records_list); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the first record needs to be deleted, but the list is not + * singular, then the second record needs to be written in the + * first record's address, as this address is stored as a property + * of the FID + */ + if (mlxsw_sp_nve_mc_record_is_first(mc_record) && + !list_is_singular(&mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + swap(mc_record->kvdl_index, next_record->kvdl_index); + mlxsw_sp_nve_mc_record_refresh(next_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* This is the last case where the last remaining record needs to + * be deleted. Simply delete the entry + */ + mc_record->ops->entry_del(mc_record, mc_entry); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + struct mlxsw_sp_nve_mc_entry **mc_entry) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry(mc_record, &mc_list->records_list, list) { + if (mc_record->proto != proto) + continue; + + *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr); + if (*mc_entry) + return mc_record; + } + + return NULL; +} + +static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto); + if (IS_ERR(mc_record)) + return PTR_ERR(mc_record); + + err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr); + if (err) + goto err_ip_add; + + return 0; + +err_ip_add: + mlxsw_sp_nve_mc_record_put(mc_record); + return err; +} + +static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr, + &mc_entry); + if (WARN_ON(!mc_record)) + return; + + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static int +mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record in the list is a property of + * the FID and we never change it. It only needs to be set when + * a new list is created + */ + if (mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return 0; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index); +} + +static void +mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record needs to be invalidated only when + * the last record is about to be removed + */ + if (!list_is_singular(&mc_list->records_list)) + return; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + if (mc_record->num_entries != 1) + return; + + return mlxsw_sp_fid_nve_flood_index_clear(fid); +} + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key); + if (IS_ERR(mc_list)) + return PTR_ERR(mc_list); + + err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr); + if (err) + goto err_add_ip; + + err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list); + if (err) + goto err_fid_flood_index_set; + + return 0; + +err_fid_flood_index_set: + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); +err_add_ip: + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); + return err; +} + +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list); + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +static void +mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i]; + + if (!mc_entry->valid) + continue; + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + } + + WARN_ON(mc_record->num_entries); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_nve_mc_record *mc_record, *tmp; + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + if (!mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return; + + mlxsw_sp_fid_nve_flood_index_clear(fid); + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list) + mlxsw_sp_nve_mc_record_delete(mc_record); + + WARN_ON(!list_empty(&mc_list->records_list)); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0); + + return mlxsw_sp->nve->tunnel_index; +} + +bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, + u32 tb_id, __be32 addr) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_config *config = &nve->config; + + if (nve->num_nve_tunnels && + config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 && + config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id) + return true; + + return false; +} + +static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + int err; + + if (nve->num_nve_tunnels++ != 0) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &nve->tunnel_index); + if (err) + goto err_kvdl_alloc; + + ops = nve->nve_ops_arr[config->type]; + err = ops->init(nve, config); + if (err) + goto err_ops_init; + + return 0; + +err_ops_init: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); +err_kvdl_alloc: + nve->num_nve_tunnels--; + return err; +} + +static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + + ops = nve->nve_ops_arr[nve->config.type]; + + if (mlxsw_sp->nve->num_nve_tunnels == 1) { + ops->fini(nve); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); + } + nve->num_nve_tunnels--; +} + +static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + struct mlxsw_sp_nve_config config; + int err; + + ops = nve->nve_ops_arr[params->type]; + + if (!ops->can_offload(nve, params->dev, extack)) + return -EOPNOTSUPP; + + memset(&config, 0, sizeof(config)); + ops->nve_config(nve, params->dev, &config); + if (nve->num_nve_tunnels && + memcmp(&config, &nve->config, sizeof(config))) { + NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel"); + return err; + } + + err = mlxsw_sp_fid_vni_set(fid, params->vni); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID"); + goto err_fid_vni_set; + } + + nve->config = config; + + return 0; + +err_fid_vni_set: + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + u16 fid_index = mlxsw_sp_fid_index(fid); + + mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid); + mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index); + mlxsw_sp_fid_vni_clear(fid); + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); +} + +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char tnqdr_pl[MLXSW_REG_TNQDR_LEN]; + + mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl); +} + +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ +} + +static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp) +{ + char tnqcr_pl[MLXSW_REG_TNQCR_LEN]; + + mlxsw_reg_tnqcr_pack(tnqcr_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl); +} + +static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + char tneem_pl[MLXSW_REG_TNEEM_LEN]; + int err; + + mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem), + tneem_pl); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tndem_pl[MLXSW_REG_TNDEM_LEN]; + bool trap_en, set_ce = false; + u8 new_inner_ecn; + + trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; + + mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl); +} + +static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + int j; + + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + int err; + + err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp); + if (err) + return err; + + return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp); +} + +static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6)) + return -EIO; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max; + + return 0; +} + +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve; + int err; + + nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL); + if (!nve) + return -ENOMEM; + mlxsw_sp->nve = nve; + nve->mlxsw_sp = mlxsw_sp; + nve->nve_ops_arr = mlxsw_sp->nve_ops_arr; + + err = rhashtable_init(&nve->mc_list_ht, + &mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_init; + + err = mlxsw_sp_nve_qos_init(mlxsw_sp); + if (err) + goto err_nve_qos_init; + + err = mlxsw_sp_nve_ecn_init(mlxsw_sp); + if (err) + goto err_nve_ecn_init; + + err = mlxsw_sp_nve_resources_query(mlxsw_sp); + if (err) + goto err_nve_resources_query; + + return 0; + +err_nve_resources_query: +err_nve_ecn_init: +err_nve_qos_init: + rhashtable_destroy(&nve->mc_list_ht); +err_rhashtable_init: + mlxsw_sp->nve = NULL; + kfree(nve); + return err; +} + +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels); + rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht); + mlxsw_sp->nve = NULL; + kfree(mlxsw_sp->nve); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h new file mode 100644 index 000000000000..4cc3297e13d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_NVE_H +#define _MLXSW_SPECTRUM_NVE_H + +#include <linux/netlink.h> +#include <linux/rhashtable.h> + +#include "spectrum.h" + +struct mlxsw_sp_nve_config { + enum mlxsw_sp_nve_type type; + u8 ttl; + u8 learning_en:1; + __be16 udp_dport; + __be32 flowlabel; + u32 ul_tb_id; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; +}; + +struct mlxsw_sp_nve { + struct mlxsw_sp_nve_config config; + struct rhashtable mc_list_ht; + struct mlxsw_sp *mlxsw_sp; + const struct mlxsw_sp_nve_ops **nve_ops_arr; + unsigned int num_nve_tunnels; /* Protected by RTNL */ + unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX]; + u32 tunnel_index; +}; + +struct mlxsw_sp_nve_ops { + enum mlxsw_sp_nve_type type; + bool (*can_offload)(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack); + void (*nve_config)(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct mlxsw_sp_nve_config *config); + int (*init)(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config); + void (*fini)(struct mlxsw_sp_nve *nve); +}; + +extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops; +extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c new file mode 100644 index 000000000000..d21c7be5b1c9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <net/vxlan.h> + +#include "reg.h" +#include "spectrum_nve.h" + +/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B) + * + * In the worst case - where we have a VLAN tag on the outer Ethernet + * header and IPv6 in overlay and underlay - we need to parse 128 bytes + */ +#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128 +#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96 + +#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX + +static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_config *cfg = &vxlan->cfg; + + if (cfg->saddr.sa.sa_family != AF_INET) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported"); + return false; + } + + if (vxlan_addr_multicast(&cfg->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported"); + return false; + } + + if (vxlan_addr_any(&cfg->saddr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified"); + return false; + } + + if (cfg->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported"); + return false; + } + + if (cfg->port_min || cfg->port_max) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported"); + return false; + } + + if (cfg->tos != 1) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_TTL_INHERIT) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_LEARN) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported"); + return false; + } + + if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported"); + return false; + } + + if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag"); + return false; + } + + if (cfg->ttl == 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0"); + return false; + } + + if (cfg->label != 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0"); + return false; + } + + return true; +} + +static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct mlxsw_sp_nve_config *config) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_config *cfg = &vxlan->cfg; + + config->type = MLXSW_SP_NVE_TYPE_VXLAN; + config->ttl = cfg->ttl; + config->flowlabel = cfg->label; + config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0; + config->ul_tb_id = RT_TABLE_MAIN; + config->ul_proto = MLXSW_SP_L3_PROTO_IPV4; + config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr; + config->udp_dport = cfg->dst_port; +} + +static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp, + unsigned int parsing_depth, + __be16 udp_dport) +{ + char mprs_pl[MLXSW_REG_MPRS_LEN]; + + mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport)); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); +} + +static int +mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_vr_id; + u8 udp_sport; + int err; + + err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, + &ul_vr_id); + if (err) + return err; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, + config->ttl); + /* VxLAN driver's default UDP source port range is 32768 (0x8000) + * to 60999 (0xee47). Set the upper 8 bits of the UDP source port + * to a random number between 0x80 and 0xee + */ + get_random_bytes(&udp_sport, sizeof(udp_sport)); + udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en); + mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id); + mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_nve_parsing_set(mlxsw_sp, + MLXSW_SP_NVE_VXLAN_PARSING_DEPTH, + config->udp_dport); + if (err) + return err; + + err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); + return err; +} + +static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); +} + +const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp1_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp1_nve_vxlan_init, + .fini = mlxsw_sp1_nve_vxlan_fini, +}; + +static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + return false; +} + +static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ +} + +const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp2_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp2_nve_vxlan_init, + .fini = mlxsw_sp2_nve_vxlan_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2ab9cf25a08a..9e9bb57134f2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type { * encapsulating entries.) */ MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, + MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, }; struct mlxsw_sp_nexthop_group; @@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, return NULL; } +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id) +{ + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return -ESRCH; + *vr_id = vr->id; + + return 0; +} + static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { @@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); } +static struct mlxsw_sp_fib_entry * +mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + enum mlxsw_sp_fib_entry_type type) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node; + unsigned char addr_prefix_len; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + const void *addrp; + size_t addr_len; + u32 addr4; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, proto); + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr4 = be32_to_cpu(addr->addr4); + addrp = &addr4; + addr_len = 4; + addr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + default: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, + addr_prefix_len); + if (!fib_node || list_empty(&fib_node->entry_list)) + return NULL; + + fib_entry = list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list); + if (fib_entry->type != type) + return NULL; + + return fib_entry; +} + /* Given an IPIP entry, find the corresponding decap route. */ static struct mlxsw_sp_fib_entry * mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, @@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return 0; } +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + /* It is valid to create a tunnel with a local IP and only later + * assign this IP address to a local interface + */ + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + return 0; + + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_update; + + return 0; + +err_fib_entry_update: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + return err; +} + +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + return; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: return true; default: return false; @@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) int i; if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) { + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; return; } @@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, fib_entry->decap.tunnel_index); } +static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); } return -EINVAL; } @@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); struct net_device *dev = fen_info->fi->fib_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry, ipip_entry); } + if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id, + dip.addr4)) { + u32 t_index; + + t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp); + fib_entry->decap.tunnel_index = t_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + return 0; + } /* fall through */ case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index fa16ad2c6a50..bc60d7a8b49d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -15,6 +15,7 @@ #include <linux/rtnetlink.h> #include <linux/netlink.h> #include <net/switchdev.h> +#include <net/vxlan.h> #include "spectrum_span.h" #include "spectrum_switchdev.h" @@ -83,9 +84,19 @@ struct mlxsw_sp_bridge_ops { void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); + int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack); + void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev); struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device, u16 vid); + struct mlxsw_sp_fid * + (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid); + u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid); }; static int @@ -1236,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) MLXSW_REG_SFD_OP_WRITE_REMOVE; } +static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + bool adding, bool dynamic) +{ + enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto; + char *sfd_pl; + u8 num_rec; + u32 uip; + int err; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + uip = be32_to_cpu(addr->addr4); + sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4; + break; + case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + default: + WARN_ON(1); + return -EOPNOTSUPP; + } + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0, + mlxsw_sp_sfd_rec_policy(dynamic), mac, fid, + MLXSW_REG_SFD_REC_ACTION_NOP, uip, + sfd_proto); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, @@ -1949,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device, mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); } +static int +mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + WARN_ON(1); + return -EINVAL; +} + +static void +mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev) +{ +} + static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) @@ -1958,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); } +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + WARN_ON(1); + return NULL; +} + +static u16 +mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_8021q_vid(fid); +} + static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = { .port_join = mlxsw_sp_bridge_8021q_port_join, .port_leave = mlxsw_sp_bridge_8021q_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join, + .vxlan_leave = mlxsw_sp_bridge_8021q_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, }; static bool @@ -2025,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); } +static int +mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_nve_params params = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .vni = vxlan->cfg.vni, + .dev = vxlan_dev, + }; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); + if (!fid) + return -EINVAL; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return -EINVAL; + + err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, ¶ms, extack); + if (err) + goto err_nve_fid_enable; + + /* The tunnel port does not hold a reference on the FID. Only + * local ports and the router port + */ + mlxsw_sp_fid_put(fid); + + return 0; + +err_nve_fid_enable: + mlxsw_sp_fid_put(fid); + return err; +} + +static void +mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct mlxsw_sp_fid *fid; + + fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); + if (WARN_ON(!fid)) + return; + + /* If the VxLAN device is down, then the FID does not have a VNI */ + if (!mlxsw_sp_fid_vni_is_set(fid)) + goto out; + + mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); +out: + mlxsw_sp_fid_put(fid); +} + static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct net_device *vxlan_dev; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + if (IS_ERR(fid)) + return fid; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return fid; + + vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev); + if (!vxlan_dev) + return fid; + + if (!netif_running(vxlan_dev)) + return fid; + + err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL); + if (err) + goto err_vxlan_join; + + return fid; + +err_vxlan_join: + mlxsw_sp_fid_put(fid); + return ERR_PTR(err); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + /* The only valid VLAN for a VLAN-unaware bridge is 0 */ + if (vid) + return NULL; + + return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); +} + +static u16 +mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return 0; } static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { .port_join = mlxsw_sp_bridge_8021d_port_join, .port_leave = mlxsw_sp_bridge_8021d_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join, + .vxlan_leave = mlxsw_sp_bridge_8021d_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021d_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021d_fid_vid, }; int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2087,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); } +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack); +} + +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return; + + bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev); +} + static void mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, - struct net_device *dev) + struct net_device *dev, bool offloaded) { struct switchdev_notifier_fdb_info info; info.addr = mac; info.vid = vid; + info.offloaded = offloaded; call_switchdev_notifiers(type, dev, &info.info); } @@ -2147,7 +2372,7 @@ do_fdb_op: if (!do_notification) return; type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; - mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); return; @@ -2207,7 +2432,7 @@ do_fdb_op: if (!do_notification) return; type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; - mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); return; @@ -2283,11 +2508,126 @@ out: struct mlxsw_sp_switchdev_event_work { struct work_struct work; - struct switchdev_notifier_fdb_info fdb_info; + union { + struct switchdev_notifier_fdb_info fdb_info; + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + }; struct net_device *dev; unsigned long event; }; +static void +mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, + enum mlxsw_sp_l3proto *proto, + union mlxsw_sp_l3addr *addr) +{ + if (vxlan_addr->sa.sa_family == AF_INET) { + addr->addr4 = vxlan_addr->sin.sin_addr.s_addr; + *proto = MLXSW_SP_L3_PROTO_IPV4; + } else { + addr->addr6 = vxlan_addr->sin6.sin6_addr; + *proto = MLXSW_SP_L3_PROTO_IPV6; + } +} + +static void +mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; + struct net_device *dev = switchdev_work->dev; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + int err; + + fdb_info = &switchdev_work->fdb_info; + err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info); + if (err) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip, + &proto, &addr); + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, true, false); + if (err) + return; + vxlan_fdb_info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info.eth_addr, + fdb_info->vid, dev, true); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, false, + false); + vxlan_fdb_info.offloaded = false; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info); + break; + } +} + +static void +mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fid *fid; + __be32 vni; + int err; + + if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE && + switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE) + return; + + if (!switchdev_work->fdb_info.added_by_user) + return; + + if (!netif_running(dev)) + return; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + return; + if (!netif_is_bridge_master(br_dev)) + return; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return; + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = bridge_device->ops->fid_lookup(bridge_device, + switchdev_work->fdb_info.vid); + if (!fid) + return; + + err = mlxsw_sp_fid_vni(fid, &vni); + if (err) + goto out; + + mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid, + vni); + +out: + mlxsw_sp_fid_put(fid); +} + static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) { struct mlxsw_sp_switchdev_event_work *switchdev_work = @@ -2298,6 +2638,11 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) int err; rtnl_lock(); + if (netif_is_vxlan(dev)) { + mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work); + goto out; + } + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); if (!mlxsw_sp_port) goto out; @@ -2312,7 +2657,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) break; mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, fdb_info->addr, - fdb_info->vid, dev); + fdb_info->vid, dev, true); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; @@ -2337,6 +2682,189 @@ out: dev_put(dev); } +static void +mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct net_device *br_dev; + struct mlxsw_sp_fid *fid; + u16 vid; + int err; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + br_dev = netdev_master_upper_dev_get(dev); + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr); + if (err) { + mlxsw_sp_fid_put(fid); + return; + } + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info); + mlxsw_sp_fid_put(fid); + return; + } + + /* The device has a single FDB table, whereas Linux has two - one + * in the bridge driver and another in the VxLAN driver. We only + * program an entry to the device if the MAC points to the VxLAN + * device in the bridge's FDB table + */ + vid = bridge_device->ops->fid_vid(bridge_device, fid); + if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev) + goto err_br_fdb_find; + + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, + &addr, true, false); + if (err) + goto err_fdb_tunnel_uc_op; + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, true); + + mlxsw_sp_fid_put(fid); + + return; + +err_fdb_tunnel_uc_op: +err_br_fdb_find: + mlxsw_sp_fid_put(fid); +} + +static void +mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct mlxsw_sp_fid *fid; + u16 vid; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr); + mlxsw_sp_fid_put(fid); + return; + } + + mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, &addr, + false, false); + vid = bridge_device->ops->fid_vid(bridge_device, fid); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, false); + + mlxsw_sp_fid_put(fid); +} + +static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work) +{ + struct mlxsw_sp_switchdev_event_work *switchdev_work = + container_of(work, struct mlxsw_sp_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + + rtnl_lock(); + + if (!netif_running(dev)) + goto out; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + goto out; + if (!netif_is_bridge_master(br_dev)) + goto out; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + goto out; + + switch (switchdev_work->event) { + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work); + break; + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work); + break; + } + +out: + rtnl_unlock(); + kfree(switchdev_work); + dev_put(dev); +} + +static int +mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct switchdev_notifier_info *info) +{ + struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct vxlan_config *cfg = &vxlan->cfg; + + vxlan_fdb_info = container_of(info, + struct switchdev_notifier_vxlan_fdb_info, + info); + + if (vxlan_fdb_info->remote_port != cfg->dst_port) + return -EOPNOTSUPP; + if (vxlan_fdb_info->remote_vni != cfg->vni) + return -EOPNOTSUPP; + if (vxlan_fdb_info->vni != cfg->vni) + return -EOPNOTSUPP; + if (vxlan_fdb_info->remote_ifindex) + return -EOPNOTSUPP; + if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) + return -EOPNOTSUPP; + if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) + return -EOPNOTSUPP; + + switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; + + return 0; +} + /* Called under rcu_read_lock() */ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) @@ -2346,6 +2874,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, struct switchdev_notifier_fdb_info *fdb_info; struct switchdev_notifier_info *info = ptr; struct net_device *br_dev; + int err; /* Tunnel devices are not our uppers, so check their master instead */ br_dev = netdev_master_upper_dev_get_rcu(dev); @@ -2386,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, */ dev_hold(dev); break; + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + INIT_WORK(&switchdev_work->work, + mlxsw_sp_switchdev_vxlan_fdb_event_work); + err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work, + info); + if (err) + goto err_vxlan_work_prepare; + dev_hold(dev); + break; default: kfree(switchdev_work); return NOTIFY_DONE; @@ -2395,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, return NOTIFY_DONE; +err_vxlan_work_prepare: err_addr_alloc: kfree(switchdev_work); return NOTIFY_BAD; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 9b8a17ee3cb3..3238b9ee42f3 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -133,9 +133,9 @@ static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot) { unsigned int val, timeout = 10; - /* Wait for the issued mac table command to be completed, or timeout. - * When the command read from ANA_TABLES_MACACCESS is - * MACACCESS_CMD_IDLE, the issued command completed successfully. + /* Wait for the issued vlan table command to be completed, or timeout. + * When the command read from ANA_TABLES_VLANACCESS is + * VLANACCESS_CMD_IDLE, the issued command completed successfully. */ do { val = ocelot_read(ocelot, ANA_TABLES_VLANACCESS); diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index f2b1938236fe..244dc261006e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -399,12 +399,14 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, switch (off) { case offsetof(struct iphdr, daddr): - set_ip_addr->ipv4_dst_mask = mask; - set_ip_addr->ipv4_dst = exact; + set_ip_addr->ipv4_dst_mask |= mask; + set_ip_addr->ipv4_dst &= ~mask; + set_ip_addr->ipv4_dst |= exact & mask; break; case offsetof(struct iphdr, saddr): - set_ip_addr->ipv4_src_mask = mask; - set_ip_addr->ipv4_src = exact; + set_ip_addr->ipv4_src_mask |= mask; + set_ip_addr->ipv4_src &= ~mask; + set_ip_addr->ipv4_src |= exact & mask; break; default: return -EOPNOTSUPP; @@ -418,11 +420,12 @@ nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, } static void -nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask, +nfp_fl_set_ip6_helper(int opcode_tag, u8 word, __be32 exact, __be32 mask, struct nfp_fl_set_ipv6_addr *ip6) { - ip6->ipv6[idx % 4].mask = mask; - ip6->ipv6[idx % 4].exact = exact; + ip6->ipv6[word].mask |= mask; + ip6->ipv6[word].exact &= ~mask; + ip6->ipv6[word].exact |= exact & mask; ip6->reserved = cpu_to_be16(0); ip6->head.jump_id = opcode_tag; @@ -435,6 +438,7 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, struct nfp_fl_set_ipv6_addr *ip_src) { __be32 exact, mask; + u8 word; /* We are expecting tcf_pedit to return a big endian value */ mask = (__force __be32)~tcf_pedit_mask(action, idx); @@ -443,17 +447,20 @@ nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, if (exact & ~mask) return -EOPNOTSUPP; - if (off < offsetof(struct ipv6hdr, saddr)) + if (off < offsetof(struct ipv6hdr, saddr)) { return -EOPNOTSUPP; - else if (off < offsetof(struct ipv6hdr, daddr)) - nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx, + } else if (off < offsetof(struct ipv6hdr, daddr)) { + word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word, exact, mask, ip_src); - else if (off < offsetof(struct ipv6hdr, daddr) + - sizeof(struct in6_addr)) - nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx, + } else if (off < offsetof(struct ipv6hdr, daddr) + + sizeof(struct in6_addr)) { + word = (off - offsetof(struct ipv6hdr, daddr)) / sizeof(exact); + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word, exact, mask, ip_dst); - else + } else { return -EOPNOTSUPP; + } return 0; } @@ -511,7 +518,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, struct nfp_fl_set_eth set_eth; enum pedit_header_type htype; int idx, nkeys, err; - size_t act_size; + size_t act_size = 0; u32 offset, cmd; u8 ip_proto = 0; @@ -569,7 +576,9 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, act_size = sizeof(set_eth); memcpy(nfp_action, &set_eth, act_size); *a_len += act_size; - } else if (set_ip_addr.head.len_lw) { + } + if (set_ip_addr.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_ip_addr); memcpy(nfp_action, &set_ip_addr, act_size); *a_len += act_size; @@ -577,10 +586,12 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, /* Hardware will automatically fix IPv4 and TCP/UDP checksum. */ *csum_updated |= TCA_CSUM_UPDATE_FLAG_IPV4HDR | nfp_fl_csum_l4_to_flag(ip_proto); - } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { + } + if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { /* TC compiles set src and dst IPv6 address as a single action, * the hardware requires this to be 2 separate actions. */ + nfp_action += act_size; act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; @@ -593,6 +604,7 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, /* Hardware will automatically fix TCP/UDP checksum. */ *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_dst.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_ip6_dst); memcpy(nfp_action, &set_ip6_dst, act_size); *a_len += act_size; @@ -600,13 +612,16 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow, /* Hardware will automatically fix TCP/UDP checksum. */ *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); } else if (set_ip6_src.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_ip6_src); memcpy(nfp_action, &set_ip6_src, act_size); *a_len += act_size; /* Hardware will automatically fix TCP/UDP checksum. */ *csum_updated |= nfp_fl_csum_l4_to_flag(ip_proto); - } else if (set_tport.head.len_lw) { + } + if (set_tport.head.len_lw) { + nfp_action += act_size; act_size = sizeof(set_tport); memcpy(nfp_action, &set_tport, act_size); *a_len += act_size; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 30c926c4bc47..8e5bec04d1f9 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -4,6 +4,7 @@ #include <linux/etherdevice.h> #include <linux/inetdevice.h> #include <net/netevent.h> +#include <net/vxlan.h> #include <linux/idr.h> #include <net/dst_metadata.h> #include <net/arp.h> @@ -187,7 +188,7 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) return false; if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) return true; - if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan")) + if (netif_is_vxlan(netdev)) return true; return false; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 8b23d2848457..25382f8fbb70 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -19,34 +19,18 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/errno.h> -#include <linux/ioport.h> -#include <linux/crc32.h> -#include <linux/platform_device.h> -#include <linux/spinlock.h> -#include <linux/ethtool.h> -#include <linux/mii.h> #include <linux/clk.h> -#include <linux/workqueue.h> -#include <linux/netdevice.h> +#include <linux/crc32.h> #include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/phy.h> -#include <linux/dma-mapping.h> -#include <linux/of.h> +#include <linux/module.h> #include <linux/of_net.h> -#include <linux/types.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> -#include <linux/io.h> #include <mach/board.h> -#include <mach/platform.h> #include <mach/hardware.h> +#include <mach/platform.h> #define MODNAME "lpc-eth" #define DRV_VERSION "1.00" @@ -1257,18 +1241,19 @@ static const struct net_device_ops lpc_netdev_ops = { static int lpc_eth_drv_probe(struct platform_device *pdev) { - struct resource *res; - struct net_device *ndev; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; struct netdata_local *pldat; - struct phy_device *phydev; + struct net_device *ndev; dma_addr_t dma_handle; + struct resource *res; int irq, ret; u32 tmp; /* Setup network interface for RMII or MII mode */ tmp = __raw_readl(LPC32XX_CLKPWR_MACCLK_CTRL); tmp &= ~LPC32XX_CLKPWR_MACCTRL_PINS_MSK; - if (lpc_phy_interface_mode(&pdev->dev) == PHY_INTERFACE_MODE_MII) + if (lpc_phy_interface_mode(dev) == PHY_INTERFACE_MODE_MII) tmp |= LPC32XX_CLKPWR_MACCTRL_USE_MII_PINS; else tmp |= LPC32XX_CLKPWR_MACCTRL_USE_RMII_PINS; @@ -1278,7 +1263,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq = platform_get_irq(pdev, 0); if (!res || irq < 0) { - dev_err(&pdev->dev, "error getting resources.\n"); + dev_err(dev, "error getting resources.\n"); ret = -ENXIO; goto err_exit; } @@ -1286,12 +1271,12 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) /* Allocate net driver data structure */ ndev = alloc_etherdev(sizeof(struct netdata_local)); if (!ndev) { - dev_err(&pdev->dev, "could not allocate device.\n"); + dev_err(dev, "could not allocate device.\n"); ret = -ENOMEM; goto err_exit; } - SET_NETDEV_DEV(ndev, &pdev->dev); + SET_NETDEV_DEV(ndev, dev); pldat = netdev_priv(ndev); pldat->pdev = pdev; @@ -1303,9 +1288,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) ndev->irq = irq; /* Get clock for the device */ - pldat->clk = clk_get(&pdev->dev, NULL); + pldat->clk = clk_get(dev, NULL); if (IS_ERR(pldat->clk)) { - dev_err(&pdev->dev, "error getting clock.\n"); + dev_err(dev, "error getting clock.\n"); ret = PTR_ERR(pldat->clk); goto err_out_free_dev; } @@ -1318,14 +1303,14 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) /* Map IO space */ pldat->net_base = ioremap(res->start, resource_size(res)); if (!pldat->net_base) { - dev_err(&pdev->dev, "failed to map registers\n"); + dev_err(dev, "failed to map registers\n"); ret = -ENOMEM; goto err_out_disable_clocks; } ret = request_irq(ndev->irq, __lpc_eth_interrupt, 0, ndev->name, ndev); if (ret) { - dev_err(&pdev->dev, "error requesting interrupt.\n"); + dev_err(dev, "error requesting interrupt.\n"); goto err_out_iounmap; } @@ -1339,7 +1324,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) sizeof(struct txrx_desc_t) + sizeof(struct rx_status_t)); pldat->dma_buff_base_v = 0; - if (use_iram_for_net(&pldat->pdev->dev)) { + if (use_iram_for_net(dev)) { dma_handle = LPC32XX_IRAM_BASE; if (pldat->dma_buff_size <= lpc32xx_return_iram_size()) pldat->dma_buff_base_v = @@ -1350,7 +1335,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) } if (pldat->dma_buff_base_v == 0) { - ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) goto err_out_free_irq; @@ -1359,7 +1344,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) /* Allocate a chunk of memory for the DMA ethernet buffers and descriptors */ pldat->dma_buff_base_v = - dma_alloc_coherent(&pldat->pdev->dev, + dma_alloc_coherent(dev, pldat->dma_buff_size, &dma_handle, GFP_KERNEL); if (pldat->dma_buff_base_v == NULL) { @@ -1384,7 +1369,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) __lpc_get_mac(pldat, ndev->dev_addr); if (!is_valid_ether_addr(ndev->dev_addr)) { - const char *macaddr = of_get_mac_address(pdev->dev.of_node); + const char *macaddr = of_get_mac_address(np); if (macaddr) memcpy(ndev->dev_addr, macaddr, ETH_ALEN); } @@ -1414,7 +1399,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) ret = register_netdev(ndev); if (ret) { - dev_err(&pdev->dev, "Cannot register net device, aborting.\n"); + dev_err(dev, "Cannot register net device, aborting.\n"); goto err_out_dma_unmap; } platform_set_drvdata(pdev, ndev); @@ -1426,19 +1411,17 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) netdev_info(ndev, "LPC mac at 0x%08x irq %d\n", res->start, ndev->irq); - phydev = ndev->phydev; - - device_init_wakeup(&pdev->dev, 1); - device_set_wakeup_enable(&pdev->dev, 0); + device_init_wakeup(dev, 1); + device_set_wakeup_enable(dev, 0); return 0; err_out_unregister_netdev: unregister_netdev(ndev); err_out_dma_unmap: - if (!use_iram_for_net(&pldat->pdev->dev) || + if (!use_iram_for_net(dev) || pldat->dma_buff_size > lpc32xx_return_iram_size()) - dma_free_coherent(&pldat->pdev->dev, pldat->dma_buff_size, + dma_free_coherent(dev, pldat->dma_buff_size, pldat->dma_buff_base_v, pldat->dma_buff_base_p); err_out_free_irq: @@ -1533,13 +1516,11 @@ static int lpc_eth_drv_resume(struct platform_device *pdev) } #endif -#ifdef CONFIG_OF static const struct of_device_id lpc_eth_match[] = { { .compatible = "nxp,lpc-eth" }, { } }; MODULE_DEVICE_TABLE(of, lpc_eth_match); -#endif static struct platform_driver lpc_eth_driver = { .probe = lpc_eth_drv_probe, @@ -1550,7 +1531,7 @@ static struct platform_driver lpc_eth_driver = { #endif .driver = { .name = MODNAME, - .of_match_table = of_match_ptr(lpc_eth_match), + .of_match_table = lpc_eth_match, }, }; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 5f0962d353ce..d9a03aba0e02 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -915,7 +915,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); /* Prototypes */ int qed_fill_dev_info(struct qed_dev *cdev, struct qed_dev_info *dev_info); -void qed_link_update(struct qed_hwfn *hwfn); +void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index f2dfc7a976c0..5c221ebaa7b3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12207,11 +12207,56 @@ struct public_port { u32 transceiver_data; #define ETH_TRANSCEIVER_STATE_MASK 0x000000FF #define ETH_TRANSCEIVER_STATE_SHIFT 0x00000000 +#define ETH_TRANSCEIVER_STATE_OFFSET 0x00000000 #define ETH_TRANSCEIVER_STATE_UNPLUGGED 0x00000000 #define ETH_TRANSCEIVER_STATE_PRESENT 0x00000001 #define ETH_TRANSCEIVER_STATE_VALID 0x00000003 #define ETH_TRANSCEIVER_STATE_UPDATING 0x00000008 - +#define ETH_TRANSCEIVER_TYPE_MASK 0x0000FF00 +#define ETH_TRANSCEIVER_TYPE_OFFSET 0x8 +#define ETH_TRANSCEIVER_TYPE_NONE 0x00 +#define ETH_TRANSCEIVER_TYPE_UNKNOWN 0xFF +#define ETH_TRANSCEIVER_TYPE_1G_PCC 0x01 +#define ETH_TRANSCEIVER_TYPE_1G_ACC 0x02 +#define ETH_TRANSCEIVER_TYPE_1G_LX 0x03 +#define ETH_TRANSCEIVER_TYPE_1G_SX 0x04 +#define ETH_TRANSCEIVER_TYPE_10G_SR 0x05 +#define ETH_TRANSCEIVER_TYPE_10G_LR 0x06 +#define ETH_TRANSCEIVER_TYPE_10G_LRM 0x07 +#define ETH_TRANSCEIVER_TYPE_10G_ER 0x08 +#define ETH_TRANSCEIVER_TYPE_10G_PCC 0x09 +#define ETH_TRANSCEIVER_TYPE_10G_ACC 0x0a +#define ETH_TRANSCEIVER_TYPE_XLPPI 0x0b +#define ETH_TRANSCEIVER_TYPE_40G_LR4 0x0c +#define ETH_TRANSCEIVER_TYPE_40G_SR4 0x0d +#define ETH_TRANSCEIVER_TYPE_40G_CR4 0x0e +#define ETH_TRANSCEIVER_TYPE_100G_AOC 0x0f +#define ETH_TRANSCEIVER_TYPE_100G_SR4 0x10 +#define ETH_TRANSCEIVER_TYPE_100G_LR4 0x11 +#define ETH_TRANSCEIVER_TYPE_100G_ER4 0x12 +#define ETH_TRANSCEIVER_TYPE_100G_ACC 0x13 +#define ETH_TRANSCEIVER_TYPE_100G_CR4 0x14 +#define ETH_TRANSCEIVER_TYPE_4x10G_SR 0x15 +#define ETH_TRANSCEIVER_TYPE_25G_CA_N 0x16 +#define ETH_TRANSCEIVER_TYPE_25G_ACC_S 0x17 +#define ETH_TRANSCEIVER_TYPE_25G_CA_S 0x18 +#define ETH_TRANSCEIVER_TYPE_25G_ACC_M 0x19 +#define ETH_TRANSCEIVER_TYPE_25G_CA_L 0x1a +#define ETH_TRANSCEIVER_TYPE_25G_ACC_L 0x1b +#define ETH_TRANSCEIVER_TYPE_25G_SR 0x1c +#define ETH_TRANSCEIVER_TYPE_25G_LR 0x1d +#define ETH_TRANSCEIVER_TYPE_25G_AOC 0x1e +#define ETH_TRANSCEIVER_TYPE_4x10G 0x1f +#define ETH_TRANSCEIVER_TYPE_4x25G_CR 0x20 +#define ETH_TRANSCEIVER_TYPE_1000BASET 0x21 +#define ETH_TRANSCEIVER_TYPE_10G_BASET 0x22 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR 0x30 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR 0x31 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR 0x32 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR 0x33 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR 0x34 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR 0x35 +#define ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC 0x36 u32 wol_info; u32 wol_pkt_len; u32 wol_pkt_details; @@ -13199,6 +13244,13 @@ struct nvm_cfg1_port { u32 transceiver_00; u32 device_ids; u32 board_cfg; +#define NVM_CFG1_PORT_PORT_TYPE_MASK 0x000000FF +#define NVM_CFG1_PORT_PORT_TYPE_OFFSET 0 +#define NVM_CFG1_PORT_PORT_TYPE_UNDEFINED 0x0 +#define NVM_CFG1_PORT_PORT_TYPE_MODULE 0x1 +#define NVM_CFG1_PORT_PORT_TYPE_BACKPLANE 0x2 +#define NVM_CFG1_PORT_PORT_TYPE_EXT_PHY 0x3 +#define NVM_CFG1_PORT_PORT_TYPE_MODULE_SLAVE 0x4 u32 mnm_10g_cap; u32 mnm_10g_ctrl; u32 mnm_10g_misc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index af3a28ec04eb..0f0aba793352 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -228,7 +228,7 @@ static int qed_grc_attn_cb(struct qed_hwfn *p_hwfn) attn_master_to_str(GET_FIELD(tmp, QED_GRC_ATTENTION_MASTER)), GET_FIELD(tmp2, QED_GRC_ATTENTION_PF), (GET_FIELD(tmp2, QED_GRC_ATTENTION_PRIV) == - QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Ireelevant)", + QED_GRC_ATTENTION_PRIV_VF) ? "VF" : "(Irrelevant)", GET_FIELD(tmp2, QED_GRC_ATTENTION_VF)); out: diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 75d217aaf8ce..35fd0db6a677 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -58,6 +58,7 @@ #include "qed_iscsi.h" #include "qed_mcp.h" +#include "qed_reg_addr.h" #include "qed_hw.h" #include "qed_selftest.h" #include "qed_debug.h" @@ -1304,6 +1305,7 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) struct qed_hwfn *hwfn; struct qed_mcp_link_params *link_params; struct qed_ptt *ptt; + u32 sup_caps; int rc; if (!cdev) @@ -1330,26 +1332,50 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) link_params->speed.autoneg = params->autoneg; if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) { link_params->speed.advertised_speeds = 0; - if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) || - (params->adv_speeds & QED_LM_1000baseT_Full_BIT)) + sup_caps = QED_LM_1000baseT_Full_BIT | + QED_LM_1000baseKX_Full_BIT | + QED_LM_1000baseX_Full_BIT; + if (params->adv_speeds & sup_caps) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; - if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT) + sup_caps = QED_LM_10000baseT_Full_BIT | + QED_LM_10000baseKR_Full_BIT | + QED_LM_10000baseKX4_Full_BIT | + QED_LM_10000baseR_FEC_BIT | + QED_LM_10000baseCR_Full_BIT | + QED_LM_10000baseSR_Full_BIT | + QED_LM_10000baseLR_Full_BIT | + QED_LM_10000baseLRM_Full_BIT; + if (params->adv_speeds & sup_caps) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; if (params->adv_speeds & QED_LM_20000baseKR2_Full_BIT) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G; - if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT) + sup_caps = QED_LM_25000baseKR_Full_BIT | + QED_LM_25000baseCR_Full_BIT | + QED_LM_25000baseSR_Full_BIT; + if (params->adv_speeds & sup_caps) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G; - if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT) + sup_caps = QED_LM_40000baseLR4_Full_BIT | + QED_LM_40000baseKR4_Full_BIT | + QED_LM_40000baseCR4_Full_BIT | + QED_LM_40000baseSR4_Full_BIT; + if (params->adv_speeds & sup_caps) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; - if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; + sup_caps = QED_LM_50000baseKR2_Full_BIT | + QED_LM_50000baseCR2_Full_BIT | + QED_LM_50000baseSR2_Full_BIT; + if (params->adv_speeds & sup_caps) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G; - if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT) + sup_caps = QED_LM_100000baseKR4_Full_BIT | + QED_LM_100000baseSR4_Full_BIT | + QED_LM_100000baseCR4_Full_BIT | + QED_LM_100000baseLR4_ER4_Full_BIT; + if (params->adv_speeds & sup_caps) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G; } @@ -1462,12 +1488,149 @@ static int qed_get_link_data(struct qed_hwfn *hwfn, return 0; } +static void qed_fill_link_capability(struct qed_hwfn *hwfn, + struct qed_ptt *ptt, u32 capability, + u32 *if_capability) +{ + u32 media_type, tcvr_state, tcvr_type; + u32 speed_mask, board_cfg; + + if (qed_mcp_get_media_type(hwfn, ptt, &media_type)) + media_type = MEDIA_UNSPECIFIED; + + if (qed_mcp_get_transceiver_data(hwfn, ptt, &tcvr_state, &tcvr_type)) + tcvr_type = ETH_TRANSCEIVER_STATE_UNPLUGGED; + + if (qed_mcp_trans_speed_mask(hwfn, ptt, &speed_mask)) + speed_mask = 0xFFFFFFFF; + + if (qed_mcp_get_board_config(hwfn, ptt, &board_cfg)) + board_cfg = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED; + + DP_VERBOSE(hwfn->cdev, NETIF_MSG_DRV, + "Media_type = 0x%x tcvr_state = 0x%x tcvr_type = 0x%x speed_mask = 0x%x board_cfg = 0x%x\n", + media_type, tcvr_state, tcvr_type, speed_mask, board_cfg); + + switch (media_type) { + case MEDIA_DA_TWINAX: + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G) + *if_capability |= QED_LM_20000baseKR2_Full_BIT; + /* For DAC media multiple speed capabilities are supported*/ + capability = capability & speed_mask; + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) + *if_capability |= QED_LM_1000baseKX_Full_BIT; + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) + *if_capability |= QED_LM_10000baseCR_Full_BIT; + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) + *if_capability |= QED_LM_40000baseCR4_Full_BIT; + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) + *if_capability |= QED_LM_25000baseCR_Full_BIT; + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + *if_capability |= QED_LM_50000baseCR2_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) + *if_capability |= QED_LM_100000baseCR4_Full_BIT; + break; + case MEDIA_BASE_T: + if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_EXT_PHY) { + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) { + *if_capability |= QED_LM_1000baseT_Full_BIT; + } + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) { + *if_capability |= QED_LM_10000baseT_Full_BIT; + } + } + if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_MODULE) { + if (tcvr_type == ETH_TRANSCEIVER_TYPE_1000BASET) + *if_capability |= QED_LM_1000baseT_Full_BIT; + if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_BASET) + *if_capability |= QED_LM_10000baseT_Full_BIT; + } + break; + case MEDIA_SFP_1G_FIBER: + case MEDIA_SFPP_10G_FIBER: + case MEDIA_XFP_FIBER: + case MEDIA_MODULE_FIBER: + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) { + if ((tcvr_type == ETH_TRANSCEIVER_TYPE_1G_LX) || + (tcvr_type == ETH_TRANSCEIVER_TYPE_1G_SX)) + *if_capability |= QED_LM_1000baseKX_Full_BIT; + } + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) { + if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_SR) + *if_capability |= QED_LM_10000baseSR_Full_BIT; + if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LR) + *if_capability |= QED_LM_10000baseLR_Full_BIT; + if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LRM) + *if_capability |= QED_LM_10000baseLRM_Full_BIT; + if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_ER) + *if_capability |= QED_LM_10000baseR_FEC_BIT; + } + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G) + *if_capability |= QED_LM_20000baseKR2_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) { + if (tcvr_type == ETH_TRANSCEIVER_TYPE_25G_SR) + *if_capability |= QED_LM_25000baseSR_Full_BIT; + } + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) { + if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_LR4) + *if_capability |= QED_LM_40000baseLR4_Full_BIT; + if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_SR4) + *if_capability |= QED_LM_40000baseSR4_Full_BIT; + } + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + *if_capability |= QED_LM_50000baseKR2_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) { + if (tcvr_type == ETH_TRANSCEIVER_TYPE_100G_SR4) + *if_capability |= QED_LM_100000baseSR4_Full_BIT; + } + + break; + case MEDIA_KR: + if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G) + *if_capability |= QED_LM_20000baseKR2_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) + *if_capability |= QED_LM_1000baseKX_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) + *if_capability |= QED_LM_10000baseKR_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) + *if_capability |= QED_LM_25000baseKR_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) + *if_capability |= QED_LM_40000baseKR4_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + *if_capability |= QED_LM_50000baseKR2_Full_BIT; + if (capability & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) + *if_capability |= QED_LM_100000baseKR4_Full_BIT; + break; + case MEDIA_UNSPECIFIED: + case MEDIA_NOT_PRESENT: + DP_VERBOSE(hwfn->cdev, QED_MSG_DEBUG, + "Unknown media and transceiver type;\n"); + break; + } +} + static void qed_fill_link(struct qed_hwfn *hwfn, + struct qed_ptt *ptt, struct qed_link_output *if_link) { + struct qed_mcp_link_capabilities link_caps; struct qed_mcp_link_params params; struct qed_mcp_link_state link; - struct qed_mcp_link_capabilities link_caps; u32 media_type; memset(if_link, 0, sizeof(*if_link)); @@ -1498,58 +1661,20 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if_link->advertised_caps |= QED_LM_Autoneg_BIT; else if_link->advertised_caps &= ~QED_LM_Autoneg_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) - if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT | - QED_LM_1000baseT_Full_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) - if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G) - if_link->advertised_caps |= QED_LM_20000baseKR2_Full_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) - if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) - if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) - if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT; - if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) - if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT; - - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) - if_link->supported_caps |= QED_LM_1000baseT_Half_BIT | - QED_LM_1000baseT_Full_BIT; - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) - if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT; - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G) - if_link->supported_caps |= QED_LM_20000baseKR2_Full_BIT; - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) - if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT; - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) - if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT; - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) - if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT; - if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) - if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT; + + /* Fill link advertised capability*/ + qed_fill_link_capability(hwfn, ptt, params.speed.advertised_speeds, + &if_link->advertised_caps); + /* Fill link supported capability*/ + qed_fill_link_capability(hwfn, ptt, link_caps.speed_capabilities, + &if_link->supported_caps); if (link.link_up) if_link->speed = link.speed; /* TODO - fill duplex properly */ if_link->duplex = DUPLEX_FULL; - qed_mcp_get_media_type(hwfn->cdev, &media_type); + qed_mcp_get_media_type(hwfn, ptt, &media_type); if_link->port = qed_get_port_type(media_type); if_link->autoneg = params.speed.autoneg; @@ -1562,9 +1687,8 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE; /* Link partner capabilities */ - if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD) - if_link->lp_caps |= QED_LM_1000baseT_Half_BIT; - if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD) + if (link.partner_adv_speed & + QED_LINK_PARTNER_SPEED_1G_FD) if_link->lp_caps |= QED_LM_1000baseT_Full_BIT; if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G) if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT; @@ -1607,21 +1731,34 @@ static void qed_fill_link(struct qed_hwfn *hwfn, static void qed_get_current_link(struct qed_dev *cdev, struct qed_link_output *if_link) { + struct qed_hwfn *hwfn; + struct qed_ptt *ptt; int i; - qed_fill_link(&cdev->hwfns[0], if_link); + hwfn = &cdev->hwfns[0]; + if (IS_PF(cdev)) { + ptt = qed_ptt_acquire(hwfn); + if (ptt) { + qed_fill_link(hwfn, ptt, if_link); + qed_ptt_release(hwfn, ptt); + } else { + DP_NOTICE(hwfn, "Failed to fill link; No PTT\n"); + } + } else { + qed_fill_link(hwfn, NULL, if_link); + } for_each_hwfn(cdev, i) qed_inform_vf_link_state(&cdev->hwfns[i]); } -void qed_link_update(struct qed_hwfn *hwfn) +void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt) { void *cookie = hwfn->cdev->ops_cookie; struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common; struct qed_link_output if_link; - qed_fill_link(hwfn, &if_link); + qed_fill_link(hwfn, ptt, &if_link); qed_inform_vf_link_state(hwfn); if (IS_LEAD_HWFN(hwfn) && cookie) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index b06e4cb72c6c..386ee5410237 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1447,7 +1447,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link); - qed_link_update(p_hwfn); + qed_link_update(p_hwfn, p_ptt); out: spin_unlock_bh(&p_hwfn->mcp_info->link_lock); } @@ -1867,12 +1867,12 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) +int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_media_type) { - struct qed_hwfn *p_hwfn = &cdev->hwfns[0]; - struct qed_ptt *p_ptt; + *p_media_type = MEDIA_UNSPECIFIED; - if (IS_VF(cdev)) + if (IS_VF(p_hwfn->cdev)) return -EINVAL; if (!qed_mcp_is_init(p_hwfn)) { @@ -1880,16 +1880,195 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) return -EBUSY; } - *p_media_type = MEDIA_UNSPECIFIED; + if (!p_ptt) { + *p_media_type = MEDIA_UNSPECIFIED; + return -EINVAL; + } - p_ptt = qed_ptt_acquire(p_hwfn); - if (!p_ptt) + *p_media_type = qed_rd(p_hwfn, p_ptt, + p_hwfn->mcp_info->port_addr + + offsetof(struct public_port, + media_type)); + + return 0; +} + +int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *p_transceiver_state, + u32 *p_transceiver_type) +{ + u32 transceiver_info; + + if (IS_VF(p_hwfn->cdev)) + return -EINVAL; + + if (!qed_mcp_is_init(p_hwfn)) { + DP_NOTICE(p_hwfn, "MFW is not initialized!\n"); return -EBUSY; + } - *p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, media_type)); + *p_transceiver_type = ETH_TRANSCEIVER_TYPE_NONE; + *p_transceiver_state = ETH_TRANSCEIVER_STATE_UPDATING; - qed_ptt_release(p_hwfn, p_ptt); + transceiver_info = qed_rd(p_hwfn, p_ptt, + p_hwfn->mcp_info->port_addr + + offsetof(struct public_port, + transceiver_data)); + + *p_transceiver_state = (transceiver_info & + ETH_TRANSCEIVER_STATE_MASK) >> + ETH_TRANSCEIVER_STATE_OFFSET; + + if (*p_transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT) + *p_transceiver_type = (transceiver_info & + ETH_TRANSCEIVER_TYPE_MASK) >> + ETH_TRANSCEIVER_TYPE_OFFSET; + else + *p_transceiver_type = ETH_TRANSCEIVER_TYPE_UNKNOWN; + + return 0; +} +static bool qed_is_transceiver_ready(u32 transceiver_state, + u32 transceiver_type) +{ + if ((transceiver_state & ETH_TRANSCEIVER_STATE_PRESENT) && + ((transceiver_state & ETH_TRANSCEIVER_STATE_UPDATING) == 0x0) && + (transceiver_type != ETH_TRANSCEIVER_TYPE_NONE)) + return true; + + return false; +} + +int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_speed_mask) +{ + u32 transceiver_type, transceiver_state; + + qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state, + &transceiver_type); + + if (qed_is_transceiver_ready(transceiver_state, transceiver_type) == + false) + return -EINVAL; + + switch (transceiver_type) { + case ETH_TRANSCEIVER_TYPE_1G_LX: + case ETH_TRANSCEIVER_TYPE_1G_SX: + case ETH_TRANSCEIVER_TYPE_1G_PCC: + case ETH_TRANSCEIVER_TYPE_1G_ACC: + case ETH_TRANSCEIVER_TYPE_1000BASET: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + break; + case ETH_TRANSCEIVER_TYPE_10G_SR: + case ETH_TRANSCEIVER_TYPE_10G_LR: + case ETH_TRANSCEIVER_TYPE_10G_LRM: + case ETH_TRANSCEIVER_TYPE_10G_ER: + case ETH_TRANSCEIVER_TYPE_10G_PCC: + case ETH_TRANSCEIVER_TYPE_10G_ACC: + case ETH_TRANSCEIVER_TYPE_4x10G: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; + break; + case ETH_TRANSCEIVER_TYPE_40G_LR4: + case ETH_TRANSCEIVER_TYPE_40G_SR4: + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR: + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; + break; + case ETH_TRANSCEIVER_TYPE_100G_AOC: + case ETH_TRANSCEIVER_TYPE_100G_SR4: + case ETH_TRANSCEIVER_TYPE_100G_LR4: + case ETH_TRANSCEIVER_TYPE_100G_ER4: + case ETH_TRANSCEIVER_TYPE_100G_ACC: + *p_speed_mask = + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G; + break; + case ETH_TRANSCEIVER_TYPE_25G_SR: + case ETH_TRANSCEIVER_TYPE_25G_LR: + case ETH_TRANSCEIVER_TYPE_25G_AOC: + case ETH_TRANSCEIVER_TYPE_25G_ACC_S: + case ETH_TRANSCEIVER_TYPE_25G_ACC_M: + case ETH_TRANSCEIVER_TYPE_25G_ACC_L: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G; + break; + case ETH_TRANSCEIVER_TYPE_25G_CA_N: + case ETH_TRANSCEIVER_TYPE_25G_CA_S: + case ETH_TRANSCEIVER_TYPE_25G_CA_L: + case ETH_TRANSCEIVER_TYPE_4x25G_CR: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + break; + case ETH_TRANSCEIVER_TYPE_40G_CR4: + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + break; + case ETH_TRANSCEIVER_TYPE_100G_CR4: + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR: + *p_speed_mask = + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + break; + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR: + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR: + case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC: + *p_speed_mask = + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; + break; + case ETH_TRANSCEIVER_TYPE_XLPPI: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; + break; + case ETH_TRANSCEIVER_TYPE_10G_BASET: + *p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G | + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + break; + default: + DP_INFO(p_hwfn, "Unknown transceiver type 0x%x\n", + transceiver_type); + *p_speed_mask = 0xff; + break; + } + + return 0; +} + +int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_board_config) +{ + u32 nvm_cfg_addr, nvm_cfg1_offset, port_cfg_addr; + + if (IS_VF(p_hwfn->cdev)) + return -EINVAL; + + if (!qed_mcp_is_init(p_hwfn)) { + DP_NOTICE(p_hwfn, "MFW is not initialized!\n"); + return -EBUSY; + } + if (!p_ptt) { + *p_board_config = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED; + return -EINVAL; + } + + nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0); + nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4); + port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset + + offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]); + *p_board_config = qed_rd(p_hwfn, p_ptt, + port_cfg_addr + + offsetof(struct nvm_cfg1_port, + board_cfg)); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 85e6b3989e7a..1adfe52b3905 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -322,14 +322,61 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn, * @brief Get media type value of the port. * * @param cdev - qed dev pointer + * @param p_ptt * @param mfw_ver - media type value * * @return int - * 0 - Operation was successul. * -EBUSY - Operation failed */ -int qed_mcp_get_media_type(struct qed_dev *cdev, - u32 *media_type); +int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *media_type); + +/** + * @brief Get transceiver data of the port. + * + * @param cdev - qed dev pointer + * @param p_ptt + * @param p_transceiver_state - transceiver state. + * @param p_transceiver_type - media type value + * + * @return int - + * 0 - Operation was successful. + * -EBUSY - Operation failed + */ +int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *p_transceiver_state, + u32 *p_tranceiver_type); + +/** + * @brief Get transceiver supported speed mask. + * + * @param cdev - qed dev pointer + * @param p_ptt + * @param p_speed_mask - Bit mask of all supported speeds. + * + * @return int - + * 0 - Operation was successful. + * -EBUSY - Operation failed + */ + +int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_speed_mask); + +/** + * @brief Get board configuration. + * + * @param cdev - qed dev pointer + * @param p_ptt + * @param p_board_config - Board config. + * + * @return int - + * 0 - Operation was successful. + * -EBUSY - Operation failed + */ +int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_board_config); /** * @brief General function for sending commands to the MCP diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index be118d057b92..b6cccf44bf40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1688,7 +1688,7 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn) ops->ports_update(cookie, vxlan_port, geneve_port); /* Always update link configuration according to bulletin */ - qed_link_update(hwfn); + qed_link_update(hwfn, NULL); } void qed_iov_vf_task(struct work_struct *work) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 7ff50b4488f6..8cbbd628fd73 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -413,19 +413,42 @@ struct qede_link_mode_mapping { }; static const struct qede_link_mode_mapping qed_lm_map[] = { - {QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT}, {QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT}, {QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT}, {QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT}, - {QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT}, {QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT}, + {QED_LM_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT}, + {QED_LM_2500baseX_Full_BIT, ETHTOOL_LINK_MODE_2500baseX_Full_BIT}, + {QED_LM_Backplane_BIT, ETHTOOL_LINK_MODE_Backplane_BIT}, + {QED_LM_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT}, + {QED_LM_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT}, {QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT}, + {QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT}, + {QED_LM_10000baseR_FEC_BIT, ETHTOOL_LINK_MODE_10000baseR_FEC_BIT}, {QED_LM_20000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT}, - {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT}, + {QED_LM_40000baseKR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT}, + {QED_LM_40000baseCR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT}, + {QED_LM_40000baseSR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT}, {QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT}, + {QED_LM_25000baseCR_Full_BIT, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT}, + {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT}, + {QED_LM_25000baseSR_Full_BIT, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT}, + {QED_LM_50000baseCR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT}, {QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT}, {QED_LM_100000baseKR4_Full_BIT, - ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT}, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT}, + {QED_LM_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT}, + {QED_LM_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT}, + {QED_LM_100000baseLR4_ER4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT}, + {QED_LM_50000baseSR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT}, + {QED_LM_1000baseX_Full_BIT, ETHTOOL_LINK_MODE_1000baseX_Full_BIT}, + {QED_LM_10000baseCR_Full_BIT, ETHTOOL_LINK_MODE_10000baseCR_Full_BIT}, + {QED_LM_10000baseSR_Full_BIT, ETHTOOL_LINK_MODE_10000baseSR_Full_BIT}, + {QED_LM_10000baseLR_Full_BIT, ETHTOOL_LINK_MODE_10000baseLR_Full_BIT}, + {QED_LM_10000baseLRM_Full_BIT, ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT}, }; #define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name) \ @@ -495,6 +518,7 @@ static int qede_set_link_ksettings(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; struct qed_link_params params; + u32 sup_caps; if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { DP_INFO(edev, "Link settings are not allowed to be changed\n"); @@ -521,60 +545,85 @@ static int qede_set_link_ksettings(struct net_device *dev, params.forced_speed = base->speed; switch (base->speed) { case SPEED_1000: - if (!(current_link.supported_caps & - QED_LM_1000baseT_Full_BIT)) { + sup_caps = QED_LM_1000baseT_Full_BIT | + QED_LM_1000baseKX_Full_BIT | + QED_LM_1000baseX_Full_BIT; + if (!(current_link.supported_caps & sup_caps)) { DP_INFO(edev, "1G speed not supported\n"); return -EINVAL; } - params.adv_speeds = QED_LM_1000baseT_Full_BIT; + params.adv_speeds = current_link.supported_caps & + sup_caps; break; case SPEED_10000: - if (!(current_link.supported_caps & - QED_LM_10000baseKR_Full_BIT)) { + sup_caps = QED_LM_10000baseT_Full_BIT | + QED_LM_10000baseKR_Full_BIT | + QED_LM_10000baseKX4_Full_BIT | + QED_LM_10000baseR_FEC_BIT | + QED_LM_10000baseCR_Full_BIT | + QED_LM_10000baseSR_Full_BIT | + QED_LM_10000baseLR_Full_BIT | + QED_LM_10000baseLRM_Full_BIT; + if (!(current_link.supported_caps & sup_caps)) { DP_INFO(edev, "10G speed not supported\n"); return -EINVAL; } - params.adv_speeds = QED_LM_10000baseKR_Full_BIT; + params.adv_speeds = current_link.supported_caps & + sup_caps; break; case SPEED_20000: if (!(current_link.supported_caps & - QED_LM_20000baseKR2_Full_BIT)) { + QED_LM_20000baseKR2_Full_BIT)) { DP_INFO(edev, "20G speed not supported\n"); return -EINVAL; } params.adv_speeds = QED_LM_20000baseKR2_Full_BIT; break; case SPEED_25000: - if (!(current_link.supported_caps & - QED_LM_25000baseKR_Full_BIT)) { + sup_caps = QED_LM_25000baseKR_Full_BIT | + QED_LM_25000baseCR_Full_BIT | + QED_LM_25000baseSR_Full_BIT; + if (!(current_link.supported_caps & sup_caps)) { DP_INFO(edev, "25G speed not supported\n"); return -EINVAL; } - params.adv_speeds = QED_LM_25000baseKR_Full_BIT; + params.adv_speeds = current_link.supported_caps & + sup_caps; break; case SPEED_40000: - if (!(current_link.supported_caps & - QED_LM_40000baseLR4_Full_BIT)) { + sup_caps = QED_LM_40000baseLR4_Full_BIT | + QED_LM_40000baseKR4_Full_BIT | + QED_LM_40000baseCR4_Full_BIT | + QED_LM_40000baseSR4_Full_BIT; + if (!(current_link.supported_caps & sup_caps)) { DP_INFO(edev, "40G speed not supported\n"); return -EINVAL; } - params.adv_speeds = QED_LM_40000baseLR4_Full_BIT; + params.adv_speeds = current_link.supported_caps & + sup_caps; break; case SPEED_50000: - if (!(current_link.supported_caps & - QED_LM_50000baseKR2_Full_BIT)) { + sup_caps = QED_LM_50000baseKR2_Full_BIT | + QED_LM_50000baseCR2_Full_BIT | + QED_LM_50000baseSR2_Full_BIT; + if (!(current_link.supported_caps & sup_caps)) { DP_INFO(edev, "50G speed not supported\n"); return -EINVAL; } - params.adv_speeds = QED_LM_50000baseKR2_Full_BIT; + params.adv_speeds = current_link.supported_caps & + sup_caps; break; case SPEED_100000: - if (!(current_link.supported_caps & - QED_LM_100000baseKR4_Full_BIT)) { + sup_caps = QED_LM_100000baseKR4_Full_BIT | + QED_LM_100000baseSR4_Full_BIT | + QED_LM_100000baseCR4_Full_BIT | + QED_LM_100000baseLR4_ER4_Full_BIT; + if (!(current_link.supported_caps & sup_caps)) { DP_INFO(edev, "100G speed not supported\n"); return -EINVAL; } - params.adv_speeds = QED_LM_100000baseKR4_Full_BIT; + params.adv_speeds = current_link.supported_caps & + sup_caps; break; default: DP_INFO(edev, "Unsupported speed %u\n", base->speed); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index b48f76182049..10b075bc5959 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -380,8 +380,6 @@ static void fm93c56a_select(struct ql3_adapter *qdev) qdev->eeprom_cmd_data = AUBURN_EEPROM_CS_1; ql_write_nvram_reg(qdev, spir, ISP_NVRAM_MASK | qdev->eeprom_cmd_data); - ql_write_nvram_reg(qdev, spir, - ((ISP_NVRAM_MASK << 16) | qdev->eeprom_cmd_data)); } /* diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 8c4f49adcf9e..006b0aa8cec3 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -631,7 +631,6 @@ struct rtl8169_tc_offsets { enum rtl_flag { RTL_FLAG_TASK_ENABLED = 0, - RTL_FLAG_TASK_SLOW_PENDING, RTL_FLAG_TASK_RESET_PENDING, RTL_FLAG_MAX }; @@ -5852,6 +5851,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp) { rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC); tp->cur_tx = tp->dirty_tx = 0; + netdev_reset_queue(tp->dev); } static void rtl_reset_work(struct rtl8169_private *tp) @@ -6154,6 +6154,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd->opts2 = cpu_to_le32(opts[1]); + netdev_sent_queue(dev, skb->len); + skb_tx_timestamp(skb); /* Force memory writes to complete before releasing descriptor */ @@ -6252,7 +6254,7 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) { - unsigned int dirty_tx, tx_left; + unsigned int dirty_tx, tx_left, bytes_compl = 0, pkts_compl = 0; dirty_tx = tp->dirty_tx; smp_rmb(); @@ -6276,10 +6278,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb, tp->TxDescArray + entry); if (status & LastFrag) { - u64_stats_update_begin(&tp->tx_stats.syncp); - tp->tx_stats.packets++; - tp->tx_stats.bytes += tx_skb->skb->len; - u64_stats_update_end(&tp->tx_stats.syncp); + pkts_compl++; + bytes_compl += tx_skb->skb->len; dev_consume_skb_any(tx_skb->skb); tx_skb->skb = NULL; } @@ -6288,6 +6288,13 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) } if (tp->dirty_tx != dirty_tx) { + netdev_completed_queue(dev, pkts_compl, bytes_compl); + + u64_stats_update_begin(&tp->tx_stats.syncp); + tp->tx_stats.packets += pkts_compl; + tp->tx_stats.bytes += bytes_compl; + u64_stats_update_end(&tp->tx_stats.syncp); + tp->dirty_tx = dirty_tx; /* Sync with rtl8169_start_xmit: * - publish dirty_tx ring index (write barrier) @@ -6452,42 +6459,29 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) if (status == 0xffff || !(status & (RTL_EVENT_NAPI | tp->event_slow))) return IRQ_NONE; - rtl_irq_disable(tp); - napi_schedule_irqoff(&tp->napi); - - return IRQ_HANDLED; -} - -/* - * Workqueue context. - */ -static void rtl_slow_event_work(struct rtl8169_private *tp) -{ - struct net_device *dev = tp->dev; - u16 status; + if (unlikely(status & SYSErr)) { + rtl8169_pcierr_interrupt(tp->dev); + goto out; + } - status = rtl_get_events(tp) & tp->event_slow; - rtl_ack_events(tp, status); + if (status & LinkChg) + phy_mac_interrupt(tp->dev->phydev); - if (unlikely(status & RxFIFOOver)) { - switch (tp->mac_version) { - /* Work around for rx fifo overflow */ - case RTL_GIGA_MAC_VER_11: - netif_stop_queue(dev); - /* XXX - Hack alert. See rtl_task(). */ - set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags); - default: - break; - } + if (unlikely(status & RxFIFOOver && + tp->mac_version == RTL_GIGA_MAC_VER_11)) { + netif_stop_queue(tp->dev); + /* XXX - Hack alert. See rtl_task(). */ + set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags); } - if (unlikely(status & SYSErr)) - rtl8169_pcierr_interrupt(dev); - - if (status & LinkChg) - phy_mac_interrupt(dev->phydev); + if (status & RTL_EVENT_NAPI) { + rtl_irq_disable(tp); + napi_schedule_irqoff(&tp->napi); + } +out: + rtl_ack_events(tp, status); - rtl_irq_enable_all(tp); + return IRQ_HANDLED; } static void rtl_task(struct work_struct *work) @@ -6496,8 +6490,6 @@ static void rtl_task(struct work_struct *work) int bitnr; void (*action)(struct rtl8169_private *); } rtl_work[] = { - /* XXX - keep rtl_slow_event_work() as first element. */ - { RTL_FLAG_TASK_SLOW_PENDING, rtl_slow_event_work }, { RTL_FLAG_TASK_RESET_PENDING, rtl_reset_work }, }; struct rtl8169_private *tp = @@ -6527,29 +6519,16 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) { struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); struct net_device *dev = tp->dev; - u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; - int work_done= 0; - u16 status; - - status = rtl_get_events(tp); - rtl_ack_events(tp, status & ~tp->event_slow); + int work_done; - if (status & RTL_EVENT_NAPI_RX) - work_done = rtl_rx(dev, tp, (u32) budget); + work_done = rtl_rx(dev, tp, (u32) budget); - if (status & RTL_EVENT_NAPI_TX) - rtl_tx(dev, tp); - - if (status & tp->event_slow) { - enable_mask &= ~tp->event_slow; - - rtl_schedule_task(tp, RTL_FLAG_TASK_SLOW_PENDING); - } + rtl_tx(dev, tp); if (work_done < budget) { napi_complete_done(napi, work_done); - rtl_irq_enable(tp, enable_mask); + rtl_irq_enable_all(tp); mmiowb(); } @@ -7071,20 +7050,12 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) { unsigned int flags; - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { RTL_W8(tp, Cfg9346, Cfg9346_Unlock); RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); RTL_W8(tp, Cfg9346, Cfg9346_Lock); flags = PCI_IRQ_LEGACY; - break; - case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_40: - /* This version was reported to have issues with resume - * from suspend when using MSI-X - */ - flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI; - break; - default: + } else { flags = PCI_IRQ_ALL_TYPES; } diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index aeafdb9ac015..beb06628f22d 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -371,7 +371,7 @@ static void rocker_desc_cookie_ptr_set(const struct rocker_desc_info *desc_info, static struct rocker_desc_info * rocker_desc_head_get(const struct rocker_dma_ring_info *info) { - static struct rocker_desc_info *desc_info; + struct rocker_desc_info *desc_info; u32 head = __pos_inc(info->head, info->size); desc_info = &info->desc_info[info->head]; @@ -402,7 +402,7 @@ static void rocker_desc_head_set(const struct rocker *rocker, static struct rocker_desc_info * rocker_desc_tail_get(struct rocker_dma_ring_info *info) { - static struct rocker_desc_info *desc_info; + struct rocker_desc_info *desc_info; if (info->tail == info->head) return NULL; /* nothing to be done between head and tail */ @@ -2728,6 +2728,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port, info.addr = recv_info->addr; info.vid = recv_info->vid; + info.offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, rocker_port->dev, &info.info); } diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 8d6cff8bd162..4823b6a51134 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2447,8 +2447,7 @@ static int smc_drv_remove(struct platform_device *pdev) static int smc_drv_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); if (ndev) { if (netif_running(ndev)) { diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 226be2a56c1f..6cf4d4cb152e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -3669,8 +3669,7 @@ static int cpsw_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int cpsw_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct cpsw_common *cpsw = ndev_to_cpsw(ndev); if (cpsw->data.dual_emac) { @@ -3693,8 +3692,7 @@ static int cpsw_suspend(struct device *dev) static int cpsw_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* Select default pin state */ diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index f270beebb428..9153db120352 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -2002,8 +2002,7 @@ static int davinci_emac_remove(struct platform_device *pdev) static int davinci_emac_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); if (netif_running(ndev)) emac_dev_stop(ndev); @@ -2013,8 +2012,7 @@ static int davinci_emac_suspend(struct device *dev) static int davinci_emac_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); if (netif_running(ndev)) emac_dev_open(ndev); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index 80fdbff67d82..f9da5d6172e3 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -661,8 +661,7 @@ static int w5300_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int w5300_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct w5300_priv *priv = netdev_priv(ndev); if (netif_running(ndev)) { @@ -676,8 +675,7 @@ static int w5300_suspend(struct device *dev) static int w5300_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *ndev = platform_get_drvdata(pdev); + struct net_device *ndev = dev_get_drvdata(dev); struct w5300_priv *priv = netdev_priv(ndev); if (!netif_running(ndev)) { diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 82eccc930c5c..a0cd1c41cf5f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -831,12 +831,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); - if (skb_dst(skb)) { - int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN - - info->options_len; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, &rt->dst, + GENEVE_IPV4_HLEN + info->options_len); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { @@ -881,11 +877,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); - if (skb_dst(skb)) { - int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len); sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a7207fa7e451..2df7f60fe052 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -69,6 +69,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb, int len; skb_tx_timestamp(skb); + + /* do not fool net_timestamp_check() with various clock bases */ + skb->tstamp = 0; + skb_orphan(skb); /* Before queueing this packet to netif_rx(), diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cfda146f3b3b..fc8d5f1ee1ad 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1077,7 +1077,7 @@ static void macvlan_dev_netpoll_cleanup(struct net_device *dev) vlan->netpoll = NULL; - __netpoll_free_async(netpoll); + __netpoll_free(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index c017486e9b86..696bdf1e4576 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -289,8 +289,7 @@ static int mdio_mux_iproc_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int mdio_mux_iproc_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev); + struct iproc_mdiomux_desc *md = dev_get_drvdata(dev); clk_disable_unprepare(md->core_clk); @@ -299,8 +298,7 @@ static int mdio_mux_iproc_suspend(struct device *dev) static int mdio_mux_iproc_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev); + struct iproc_mdiomux_desc *md = dev_get_drvdata(dev); clk_prepare_enable(md->core_clk); mdio_mux_iproc_config(md); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 3db06b40580d..9265dea79412 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -14,7 +14,7 @@ * option) any later version. * * Support : Micrel Phys: - * Giga phys: ksz9021, ksz9031 + * Giga phys: ksz9021, ksz9031, ksz9131 * 100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041 * ksz8021, ksz8031, ksz8051, * ksz8081, ksz8091, @@ -609,6 +609,116 @@ err_force_master: return result; } +#define KSZ9131_SKEW_5BIT_MAX 2400 +#define KSZ9131_SKEW_4BIT_MAX 800 +#define KSZ9131_OFFSET 700 +#define KSZ9131_STEP 100 + +static int ksz9131_of_load_skew_values(struct phy_device *phydev, + struct device_node *of_node, + u16 reg, size_t field_sz, + char *field[], u8 numfields) +{ + int val[4] = {-(1 + KSZ9131_OFFSET), -(2 + KSZ9131_OFFSET), + -(3 + KSZ9131_OFFSET), -(4 + KSZ9131_OFFSET)}; + int skewval, skewmax = 0; + int matches = 0; + u16 maxval; + u16 newval; + u16 mask; + int i; + + /* psec properties in dts should mean x pico seconds */ + if (field_sz == 5) + skewmax = KSZ9131_SKEW_5BIT_MAX; + else + skewmax = KSZ9131_SKEW_4BIT_MAX; + + for (i = 0; i < numfields; i++) + if (!of_property_read_s32(of_node, field[i], &skewval)) { + if (skewval < -KSZ9131_OFFSET) + skewval = -KSZ9131_OFFSET; + else if (skewval > skewmax) + skewval = skewmax; + + val[i] = skewval + KSZ9131_OFFSET; + matches++; + } + + if (!matches) + return 0; + + if (matches < numfields) + newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg); + else + newval = 0; + + maxval = (field_sz == 4) ? 0xf : 0x1f; + for (i = 0; i < numfields; i++) + if (val[i] != -(i + 1 + KSZ9131_OFFSET)) { + mask = 0xffff; + mask ^= maxval << (field_sz * i); + newval = (newval & mask) | + (((val[i] / KSZ9131_STEP) & maxval) + << (field_sz * i)); + } + + return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval); +} + +static int ksz9131_config_init(struct phy_device *phydev) +{ + const struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + char *clk_skews[2] = {"rxc-skew-psec", "txc-skew-psec"}; + char *rx_data_skews[4] = { + "rxd0-skew-psec", "rxd1-skew-psec", + "rxd2-skew-psec", "rxd3-skew-psec" + }; + char *tx_data_skews[4] = { + "txd0-skew-psec", "txd1-skew-psec", + "txd2-skew-psec", "txd3-skew-psec" + }; + char *control_skews[2] = {"txen-skew-psec", "rxdv-skew-psec"}; + const struct device *dev_walker; + int ret; + + dev_walker = &phydev->mdio.dev; + do { + of_node = dev_walker->of_node; + dev_walker = dev_walker->parent; + } while (!of_node && dev_walker); + + if (!of_node) + return 0; + + ret = ksz9131_of_load_skew_values(phydev, of_node, + MII_KSZ9031RN_CLK_PAD_SKEW, 5, + clk_skews, 2); + if (ret < 0) + return ret; + + ret = ksz9131_of_load_skew_values(phydev, of_node, + MII_KSZ9031RN_CONTROL_PAD_SKEW, 4, + control_skews, 2); + if (ret < 0) + return ret; + + ret = ksz9131_of_load_skew_values(phydev, of_node, + MII_KSZ9031RN_RX_DATA_PAD_SKEW, 4, + rx_data_skews, 4); + if (ret < 0) + return ret; + + ret = ksz9131_of_load_skew_values(phydev, of_node, + MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4, + tx_data_skews, 4); + if (ret < 0) + return ret; + + return 0; +} + #define KSZ8873MLL_GLOBAL_CONTROL_4 0x06 #define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX BIT(6) #define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED BIT(4) @@ -975,6 +1085,23 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = kszphy_resume, }, { + .phy_id = PHY_ID_KSZ9131, + .phy_id_mask = MICREL_PHY_ID_MASK, + .name = "Microchip KSZ9131 Gigabit PHY", + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .driver_data = &ksz9021_type, + .probe = kszphy_probe, + .config_init = ksz9131_config_init, + .read_status = ksz9031_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .get_sset_count = kszphy_get_sset_count, + .get_strings = kszphy_get_strings, + .get_stats = kszphy_get_stats, + .suspend = genphy_suspend, + .resume = kszphy_resume, +}, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8873MLL Switch", @@ -1022,6 +1149,7 @@ MODULE_LICENSE("GPL"); static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, { PHY_ID_KSZ9031, MICREL_PHY_ID_MASK }, + { PHY_ID_KSZ9131, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ8001, 0x00fffffc }, { PHY_ID_KS8737, MICREL_PHY_ID_MASK }, { PHY_ID_KSZ8021, 0x00ffffff }, diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index bffe077dc75f..a2e59f4f6f01 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -522,7 +522,7 @@ static int vsc85xx_mdix_set(struct phy_device *phydev, u8 mdix) static int vsc85xx_downshift_get(struct phy_device *phydev, u8 *count) { - u16 reg_val; + int reg_val; reg_val = phy_read_paged(phydev, MSCC_PHY_PAGE_EXTENDED, MSCC_PHY_ACTIPHY_CNTL); @@ -1292,7 +1292,7 @@ static int vsc8574_config_pre_init(struct phy_device *phydev) dev_err(dev, "%s: failed to assert reset of micro\n", __func__); - return ret; + goto out; } } } else { diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d887016e54b6..db633ae9f784 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1104,10 +1104,7 @@ static void team_port_disable_netpoll(struct team_port *port) return; port->np = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - __netpoll_cleanup(np); - kfree(np); + __netpoll_free(np); } #else static int team_port_enable_netpoll(struct team_port *port) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3f5aa59c37b7..3e2c041d76ac 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2267,8 +2267,9 @@ static void virtnet_freeze_down(struct virtio_device *vdev) /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); + netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); - netif_tx_disable(vi->dev); + netif_tx_unlock_bh(vi->dev); cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { @@ -2304,7 +2305,9 @@ static int virtnet_restore_up(struct virtio_device *vdev) } } + netif_tx_lock_bh(vi->dev); netif_device_attach(vi->dev); + netif_tx_unlock_bh(vi->dev); return err; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 018406c4d944..297cdeaef479 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -103,22 +103,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; } -static inline bool vxlan_addr_any(const union vxlan_addr *ipa) -{ - if (ipa->sa.sa_family == AF_INET6) - return ipv6_addr_any(&ipa->sin6.sin6_addr); - else - return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); -} - -static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) -{ - if (ipa->sa.sa_family == AF_INET6) - return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); - else - return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); -} - static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) { if (nla_len(nla) >= sizeof(struct in6_addr)) { @@ -151,16 +135,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr; } -static inline bool vxlan_addr_any(const union vxlan_addr *ipa) -{ - return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); -} - -static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) -{ - return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); -} - static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla) { if (nla_len(nla) >= sizeof(struct in6_addr)) { @@ -298,6 +272,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ndm->ndm_state = fdb->state; ndm->ndm_ifindex = vxlan->dev->ifindex; ndm->ndm_flags = fdb->flags; + if (rdst->offloaded) + ndm->ndm_flags |= NTF_OFFLOADED; ndm->ndm_type = RTN_UNICAST; if (!net_eq(dev_net(vxlan->dev), vxlan->net) && @@ -353,8 +329,8 @@ static inline size_t vxlan_nlmsg_size(void) + nla_total_size(sizeof(struct nda_cacheinfo)); } -static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, - struct vxlan_rdst *rd, int type) +static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, + struct vxlan_rdst *rd, int type) { struct net *net = dev_net(vxlan->dev); struct sk_buff *skb; @@ -379,6 +355,49 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } +static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan, + struct vxlan_fdb *fdb, + struct vxlan_rdst *rd, + bool adding) +{ + struct switchdev_notifier_vxlan_fdb_info info; + enum switchdev_notifier_type notifier_type; + + if (WARN_ON(!rd)) + return; + + notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE + : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE; + + info = (struct switchdev_notifier_vxlan_fdb_info){ + .remote_ip = rd->remote_ip, + .remote_port = rd->remote_port, + .remote_vni = rd->remote_vni, + .remote_ifindex = rd->remote_ifindex, + .vni = fdb->vni, + .offloaded = rd->offloaded, + }; + memcpy(info.eth_addr, fdb->eth_addr, ETH_ALEN); + + call_switchdev_notifiers(notifier_type, vxlan->dev, + &info.info); +} + +static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, + struct vxlan_rdst *rd, int type) +{ + switch (type) { + case RTM_NEWNEIGH: + vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, true); + break; + case RTM_DELNEIGH: + vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, false); + break; + } + + __vxlan_fdb_notify(vxlan, fdb, rd, type); +} + static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -488,6 +507,47 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f, return NULL; } +int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + u8 eth_addr[ETH_ALEN + 2] = { 0 }; + struct vxlan_rdst *rdst; + struct vxlan_fdb *f; + int rc = 0; + + if (is_multicast_ether_addr(mac) || + is_zero_ether_addr(mac)) + return -EINVAL; + + ether_addr_copy(eth_addr, mac); + + rcu_read_lock(); + + f = __vxlan_find_mac(vxlan, eth_addr, vni); + if (!f) { + rc = -ENOENT; + goto out; + } + + rdst = first_remote_rcu(f); + + memset(fdb_info, 0, sizeof(*fdb_info)); + fdb_info->info.dev = dev; + fdb_info->remote_ip = rdst->remote_ip; + fdb_info->remote_port = rdst->remote_port; + fdb_info->remote_vni = rdst->remote_vni; + fdb_info->remote_ifindex = rdst->remote_ifindex; + fdb_info->vni = vni; + fdb_info->offloaded = rdst->offloaded; + ether_addr_copy(fdb_info->eth_addr, mac); + +out: + rcu_read_unlock(); + return rc; +} +EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc); + /* Replace destination of unicast mac */ static int vxlan_fdb_replace(struct vxlan_fdb *f, union vxlan_addr *ip, __be16 port, __be32 vni, @@ -533,6 +593,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd->remote_ip = *ip; rd->remote_port = port; + rd->offloaded = false; rd->remote_vni = vni; rd->remote_ifindex = ifindex; @@ -782,12 +843,15 @@ static void vxlan_fdb_free(struct rcu_head *head) static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, bool do_notify) { + struct vxlan_rdst *rd; + netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); --vxlan->addrcnt; if (do_notify) - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); + list_for_each_entry(rd, &f->remotes, list) + vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); hlist_del_rcu(&f->hlist); call_rcu(&f->rcu, vxlan_fdb_free); @@ -2198,11 +2262,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ndst = &rt->dst; - if (skb_dst(skb)) { - int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM); tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); @@ -2239,11 +2299,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - if (skb_dst(skb)) { - int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; - - skb_dst_update_pmtu(skb, mtu); - } + skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM); tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); @@ -3761,6 +3817,51 @@ static struct notifier_block vxlan_notifier_block __read_mostly = { .notifier_call = vxlan_netdevice_event, }; +static void +vxlan_fdb_offloaded_set(struct net_device *dev, + struct switchdev_notifier_vxlan_fdb_info *fdb_info) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *rdst; + struct vxlan_fdb *f; + + spin_lock_bh(&vxlan->hash_lock); + + f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + if (!f) + goto out; + + rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip, + fdb_info->remote_port, + fdb_info->remote_vni, + fdb_info->remote_ifindex); + if (!rdst) + goto out; + + rdst->offloaded = fdb_info->offloaded; + +out: + spin_unlock_bh(&vxlan->hash_lock); +} + +static int vxlan_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + + switch (event) { + case SWITCHDEV_VXLAN_FDB_OFFLOADED: + vxlan_fdb_offloaded_set(dev, ptr); + break; + } + + return 0; +} + +static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = { + .notifier_call = vxlan_switchdev_event, +}; + static __net_init int vxlan_init_net(struct net *net) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); @@ -3834,11 +3935,17 @@ static int __init vxlan_init_module(void) if (rc) goto out2; - rc = rtnl_link_register(&vxlan_link_ops); + rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block); if (rc) goto out3; + rc = rtnl_link_register(&vxlan_link_ops); + if (rc) + goto out4; + return 0; +out4: + unregister_switchdev_notifier(&vxlan_switchdev_notifier_block); out3: unregister_netdevice_notifier(&vxlan_notifier_block); out2: @@ -3851,6 +3958,7 @@ late_initcall(vxlan_init_module); static void __exit vxlan_cleanup_module(void) { rtnl_link_unregister(&vxlan_link_ops); + unregister_switchdev_notifier(&vxlan_switchdev_notifier_block); unregister_netdevice_notifier(&vxlan_notifier_block); unregister_pernet_subsys(&vxlan_net_ops); /* rcu_barrier() is called by netns */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dd8ec1dd9219..6bb9908bf46f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3143,8 +3143,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) } mutex_lock(&ns->ctrl->subsys->lock); - nvme_mpath_clear_current_path(ns); list_del_rcu(&ns->siblings); + nvme_mpath_clear_current_path(ns); mutex_unlock(&ns->ctrl->subsys->lock); down_write(&ns->ctrl->namespaces_rwsem); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 7f01f6f60b87..d0b7dd8fb184 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); + int ret; + + ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); + if (ret && armpmu->filter_match) + return armpmu->filter_match(event); + + return ret; } static ssize_t armpmu_cpumask_show(struct device *dev, diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 01b0e2bb3319..2012551d93e0 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -24,6 +24,8 @@ #include <linux/slab.h> #include <linux/timekeeping.h> +#include <linux/nospec.h> + #include "ptp_private.h" static int ptp_disable_pinfunc(struct ptp_clock_info *ops, @@ -248,6 +250,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; pd = ops->pin_config[pin_index]; @@ -266,6 +269,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index bc03b0a690b4..9ede35cecb12 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -310,17 +310,17 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) if (difference & ACM_CTRL_DSR) acm->iocount.dsr++; - if (difference & ACM_CTRL_BRK) - acm->iocount.brk++; - if (difference & ACM_CTRL_RI) - acm->iocount.rng++; if (difference & ACM_CTRL_DCD) acm->iocount.dcd++; - if (difference & ACM_CTRL_FRAMING) + if (newctrl & ACM_CTRL_BRK) + acm->iocount.brk++; + if (newctrl & ACM_CTRL_RI) + acm->iocount.rng++; + if (newctrl & ACM_CTRL_FRAMING) acm->iocount.frame++; - if (difference & ACM_CTRL_PARITY) + if (newctrl & ACM_CTRL_PARITY) acm->iocount.parity++; - if (difference & ACM_CTRL_OVERRUN) + if (newctrl & ACM_CTRL_OVERRUN) acm->iocount.overrun++; spin_unlock_irqrestore(&acm->read_lock, flags); @@ -355,7 +355,6 @@ static void acm_ctrl_irq(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - acm->nb_index = 0; dev_dbg(&acm->control->dev, "%s - urb shutting down with status: %d\n", __func__, status); @@ -1642,6 +1641,7 @@ static int acm_pre_reset(struct usb_interface *intf) struct acm *acm = usb_get_intfdata(intf); clear_bit(EVENT_RX_STALL, &acm->flags); + acm->nb_index = 0; /* pending control transfers are lost */ return 0; } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 244417d0dfd1..ffccd40ea67d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1474,8 +1474,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: - if (is_in) - allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1505,6 +1503,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb is_in = 0; uurb->endpoint &= ~USB_DIR_IN; } + if (is_in) + allow_short = true; snoop(&ps->dev->dev, "control urb: bRequestType=%02x " "bRequest=%02x wValue=%04x " "wIndex=%04x wLength=%04x\n", diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index ca8a4b53c59f..1074cb82ec17 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -221,6 +221,8 @@ #include <linux/usb/gadget.h> #include <linux/usb/composite.h> +#include <linux/nospec.h> + #include "configfs.h" @@ -3152,6 +3154,7 @@ static struct config_group *fsg_lun_make(struct config_group *group, fsg_opts = to_fsg_opts(&group->cg_item); if (num >= FSG_MAX_LUNS) return ERR_PTR(-ERANGE); + num = array_index_nospec(num, FSG_MAX_LUNS); mutex_lock(&fsg_opts->lock); if (fsg_opts->refcnt || fsg_opts->common->luns[num]) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 722860eb5a91..51dd8e00c4f8 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -179,10 +179,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) xhci->quirks |= XHCI_SSIC_PORT_UNUSED; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) xhci->quirks |= XHCI_INTEL_USB_ROLE_SW; - } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c index 1fb3dd0f1dfa..277de96181f9 100644 --- a/drivers/usb/roles/intel-xhci-usb-role-switch.c +++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -161,6 +161,8 @@ static int intel_xhci_usb_remove(struct platform_device *pdev) { struct intel_xhci_usb_data *data = platform_get_drvdata(pdev); + pm_runtime_disable(&pdev->dev); + usb_role_switch_unregister(data->role_sw); return 0; } diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index d11f3f8dad40..1e592ec94ba4 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -318,8 +318,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, struct vhci_hcd *vhci_hcd; struct vhci *vhci; int retval = 0; - int rhport; + int rhport = -1; unsigned long flags; + bool invalid_rhport = false; u32 prev_port_status[VHCI_HC_PORTS]; @@ -334,9 +335,19 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue, wIndex); - if (wIndex > VHCI_HC_PORTS) - pr_err("invalid port number %d\n", wIndex); - rhport = wIndex - 1; + /* + * wIndex can be 0 for some request types (typeReq). rhport is + * in valid range when wIndex >= 1 and < VHCI_HC_PORTS. + * + * Reference port_status[] only with valid rhport when + * invalid_rhport is false. + */ + if (wIndex < 1 || wIndex > VHCI_HC_PORTS) { + invalid_rhport = true; + if (wIndex > VHCI_HC_PORTS) + pr_err("invalid port number %d\n", wIndex); + } else + rhport = wIndex - 1; vhci_hcd = hcd_to_vhci_hcd(hcd); vhci = vhci_hcd->vhci; @@ -345,8 +356,9 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, /* store old status and compare now and old later */ if (usbip_dbg_flag_vhci_rh) { - memcpy(prev_port_status, vhci_hcd->port_status, - sizeof(prev_port_status)); + if (!invalid_rhport) + memcpy(prev_port_status, vhci_hcd->port_status, + sizeof(prev_port_status)); } switch (typeReq) { @@ -354,8 +366,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, usbip_dbg_vhci_rh(" ClearHubFeature\n"); break; case ClearPortFeature: - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } switch (wValue) { case USB_PORT_FEAT_SUSPEND: if (hcd->speed == HCD_USB3) { @@ -415,9 +429,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; case GetPortStatus: usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex); - if (wIndex < 1) { + if (invalid_rhport) { pr_err("invalid port number %d\n", wIndex); retval = -EPIPE; + goto error; } /* we do not care about resume. */ @@ -513,16 +528,20 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; } - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } vhci_hcd->port_status[rhport] |= USB_PORT_STAT_SUSPEND; break; case USB_PORT_FEAT_POWER: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_POWER\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) vhci_hcd->port_status[rhport] |= USB_SS_PORT_STAT_POWER; else @@ -531,8 +550,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_BH_PORT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_BH_PORT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* Applicable only for USB3.0 hub */ if (hcd->speed != HCD_USB3) { pr_err("USB_PORT_FEAT_BH_PORT_RESET req not " @@ -543,8 +564,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_RESET: usbip_dbg_vhci_rh( " SetPortFeature: USB_PORT_FEAT_RESET\n"); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { vhci_hcd->port_status[rhport] = 0; @@ -565,8 +588,10 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" SetPortFeature: default %d\n", wValue); - if (rhport < 0) + if (invalid_rhport) { + pr_err("invalid port number %d\n", wIndex); goto error; + } if (hcd->speed == HCD_USB3) { if ((vhci_hcd->port_status[rhport] & USB_SS_PORT_STAT_POWER) != 0) { @@ -608,7 +633,7 @@ error: if (usbip_dbg_flag_vhci_rh) { pr_debug("port %d\n", rhport); /* Only dump valid port status */ - if (rhport >= 0) { + if (!invalid_rhport) { dump_port_status_diff(prev_port_status[rhport], vhci_hcd->port_status[rhport], hcd->speed == HCD_USB3); @@ -618,8 +643,10 @@ error: spin_unlock_irqrestore(&vhci->lock, flags); - if ((vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) + if (!invalid_rhport && + (vhci_hcd->port_status[rhport] & PORT_C_MASK) != 0) { usb_hcd_poll_rh_status(hcd); + } return retval; } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 35f2ae30f31f..77a83790a31f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -690,8 +690,6 @@ static void afs_process_async_call(struct work_struct *work) } if (call->state == AFS_CALL_COMPLETE) { - call->reply[0] = NULL; - /* We have two refs to release - one from the alloc and one * queued with the work item - and we can't just deallocate the * call because the work item may be queued again. diff --git a/fs/afs/server.c b/fs/afs/server.c index 2f306c0cc4ee..1d329e6981d5 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -199,11 +199,9 @@ static struct afs_server *afs_install_server(struct afs_net *net, write_sequnlock(&net->fs_addr_lock); ret = 0; - goto out; exists: afs_get_server(server); -out: write_sequnlock(&net->fs_lock); return server; } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index af2b17b21b94..95983c744164 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -343,7 +343,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); @@ -666,6 +666,8 @@ struct page *dax_layout_busy_page(struct address_space *mapping) while (index < end && pagevec_lookup_entries(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { + pgoff_t nr_pages = 1; + for (i = 0; i < pagevec_count(&pvec); i++) { struct page *pvec_ent = pvec.pages[i]; void *entry; @@ -680,8 +682,15 @@ struct page *dax_layout_busy_page(struct address_space *mapping) xa_lock_irq(&mapping->i_pages); entry = get_unlocked_mapping_entry(mapping, index, NULL); - if (entry) + if (entry) { page = dax_busy_page(entry); + /* + * Account for multi-order entries at + * the end of the pagevec. + */ + if (i + 1 >= pagevec_count(&pvec)) + nr_pages = 1UL << dax_radix_order(entry); + } put_unlocked_mapping_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); if (page) @@ -696,7 +705,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); - index++; + index += nr_pages; if (page) break; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index defc2168de91..f58c0cacc531 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -682,6 +682,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 83bfe04456b6..c550512ce335 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -70,20 +70,7 @@ void fscache_free_cookie(struct fscache_cookie *cookie) } /* - * initialise an cookie jar slab element prior to any use - */ -void fscache_cookie_init_once(void *_cookie) -{ - struct fscache_cookie *cookie = _cookie; - - memset(cookie, 0, sizeof(*cookie)); - spin_lock_init(&cookie->lock); - spin_lock_init(&cookie->stores_lock); - INIT_HLIST_HEAD(&cookie->backing_objects); -} - -/* - * Set the index key in a cookie. The cookie struct has space for a 12-byte + * Set the index key in a cookie. The cookie struct has space for a 16-byte * key plus length and hash, but if that's not big enough, it's instead a * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then * the key data. @@ -93,20 +80,18 @@ static int fscache_set_key(struct fscache_cookie *cookie, { unsigned long long h; u32 *buf; + int bufs; int i; - cookie->key_len = index_key_len; + bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); if (index_key_len > sizeof(cookie->inline_key)) { - buf = kzalloc(index_key_len, GFP_KERNEL); + buf = kcalloc(bufs, sizeof(*buf), GFP_KERNEL); if (!buf) return -ENOMEM; cookie->key = buf; } else { buf = (u32 *)cookie->inline_key; - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; } memcpy(buf, index_key, index_key_len); @@ -116,7 +101,8 @@ static int fscache_set_key(struct fscache_cookie *cookie, */ h = (unsigned long)cookie->parent; h += index_key_len + cookie->type; - for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++) + + for (i = 0; i < bufs; i++) h += buf[i]; cookie->key_hash = h ^ (h >> 32); @@ -161,7 +147,7 @@ struct fscache_cookie *fscache_alloc_cookie( struct fscache_cookie *cookie; /* allocate and initialise a cookie */ - cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); + cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); if (!cookie) return NULL; @@ -192,6 +178,9 @@ struct fscache_cookie *fscache_alloc_cookie( cookie->netfs_data = netfs_data; cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); cookie->type = def->type; + spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); + INIT_HLIST_HEAD(&cookie->backing_objects); /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f83328a7f048..d6209022e965 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -51,7 +51,6 @@ extern struct fscache_cache *fscache_select_cache_for_object( extern struct kmem_cache *fscache_cookie_jar; extern void fscache_free_cookie(struct fscache_cookie *); -extern void fscache_cookie_init_once(void *); extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, const struct fscache_cookie_def *, const void *, size_t, diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 7dce110bf17d..30ad89db1efc 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -143,9 +143,7 @@ static int __init fscache_init(void) fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", sizeof(struct fscache_cookie), - 0, - 0, - fscache_cookie_init_once); + 0, 0, NULL); if (!fscache_cookie_jar) { pr_notice("Failed to allocate a cookie jar\n"); ret = -ENOMEM; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3c159a7f9a9e..84544a4f012d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -975,10 +975,6 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, { struct gfs2_inode *ip = GFS2_I(inode); - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); } @@ -1061,7 +1057,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } } release_metapath(&mp); - if (gfs2_is_jdata(ip)) + if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8e712b614e6e..933aac5da193 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -96,7 +96,9 @@ struct ocfs2_unblock_ctl { }; /* Lockdep class keys */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; +#endif static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index bf000c8aeffb..fec62e9dfbe6 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2337,8 +2337,8 @@ late_initcall(ubifs_init); static void __exit ubifs_exit(void) { - WARN_ON(list_empty(&ubifs_infos)); - WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + WARN_ON(!list_empty(&ubifs_infos)); + WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) != 0); dbg_debugfs_exit(); ubifs_compressors_exit(); diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index da9d95a19580..1e713154f00e 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -153,6 +153,17 @@ struct __drm_planes_state { struct __drm_crtcs_state { struct drm_crtc *ptr; struct drm_crtc_state *state, *old_state, *new_state; + + /** + * @commit: + * + * A reference to the CRTC commit object that is kept for use by + * drm_atomic_helper_wait_for_flip_done() after + * drm_atomic_helper_commit_hw_done() is called. This ensures that a + * concurrent commit won't free a commit object that is still in use. + */ + struct drm_crtc_commit *commit; + s32 __user *out_fence_ptr; u64 last_vblank_count; }; diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b25d12ef120a..e3c404833115 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -214,9 +214,9 @@ struct detailed_timing { #define DRM_EDID_HDMI_DC_Y444 (1 << 3) /* YCBCR 420 deep color modes */ -#define DRM_EDID_YCBCR420_DC_48 (1 << 6) -#define DRM_EDID_YCBCR420_DC_36 (1 << 5) -#define DRM_EDID_YCBCR420_DC_30 (1 << 4) +#define DRM_EDID_YCBCR420_DC_48 (1 << 2) +#define DRM_EDID_YCBCR420_DC_36 (1 << 1) +#define DRM_EDID_YCBCR420_DC_30 (1 << 0) #define DRM_EDID_YCBCR420_DC_MASK (DRM_EDID_YCBCR420_DC_48 | \ DRM_EDID_YCBCR420_DC_36 | \ DRM_EDID_YCBCR420_DC_30) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 99c19b06d9a4..fdcb45999b26 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); + pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 472fa4d4ea62..7361cd3fddc1 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -31,6 +31,7 @@ #define PHY_ID_KSZ8081 0x00221560 #define PHY_ID_KSZ8061 0x00221570 #define PHY_ID_KSZ9031 0x00221620 +#define PHY_ID_KSZ9131 0x00221640 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 0ef6138eca49..31a750570c38 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -61,6 +61,7 @@ struct mlx5_core_cq { int reset_notify_added; struct list_head reset_notify; struct mlx5_eq *eq; + u16 uid; }; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e9b502d5bcc1..b4c0457fbebd 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1124,6 +1124,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) +#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap) + #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4b75796cac23..31460eeb6fe0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -133,6 +133,7 @@ enum { MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, MLX5_REG_PMLP = 0x5002, + MLX5_REG_PPLM = 0x5023, MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, @@ -163,10 +164,7 @@ enum mlx5_dcbx_oper_mode { }; enum mlx5_dct_atomic_mode { - MLX5_ATOMIC_MODE_DCT_OFF = 20, - MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF, - MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF, - MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF, + MLX5_ATOMIC_MODE_DCT_CX = 2, }; enum { @@ -360,7 +358,7 @@ struct mlx5_frag_buf { }; struct mlx5_frag_buf_ctrl { - struct mlx5_frag_buf frag_buf; + struct mlx5_buf_list *frags; u32 sz_m1; u16 frag_sz_m1; u16 strides_offset; @@ -477,6 +475,7 @@ struct mlx5_core_srq { atomic_t refcount; struct completion free; + u16 uid; }; struct mlx5_eq_table { @@ -996,10 +995,12 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } -static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, +static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags, + u8 log_stride, u8 log_sz, u16 strides_offset, struct mlx5_frag_buf_ctrl *fbc) { + fbc->frags = frags; fbc->log_stride = log_stride; fbc->log_sz = log_sz; fbc->sz_m1 = (1 << fbc->log_sz) - 1; @@ -1008,18 +1009,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, fbc->strides_offset = strides_offset; } -static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz, +static inline void mlx5_init_fbc(struct mlx5_buf_list *frags, + u8 log_stride, u8 log_sz, struct mlx5_frag_buf_ctrl *fbc) { - mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc); -} - -static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, - void *cqc) -{ - mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz), - MLX5_GET(cqc, cqc, log_cq_size), - fbc); + mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc); } static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, @@ -1030,8 +1024,15 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, ix += fbc->strides_offset; frag = ix >> fbc->log_frag_strides; - return fbc->frag_buf.frags[frag].buf + - ((fbc->frag_sz_m1 & ix) << fbc->log_stride); + return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); +} + +static inline u32 +mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) +{ + u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1; + + return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); } int mlx5_cmd_init(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 804516e4f483..5660f07d3be0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -45,7 +45,8 @@ enum { }; enum { - MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), }; #define LEFTOVERS_RULE_NUM 2 @@ -91,7 +92,7 @@ struct mlx5_flow_destination { u32 tir_num; u32 ft_num; struct mlx5_flow_table *ft; - struct mlx5_fc *counter; + u32 counter_id; struct { u16 num; u16 vhca_id; @@ -101,6 +102,8 @@ struct mlx5_flow_destination { }; struct mlx5_flow_namespace * +mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n); +struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); struct mlx5_flow_namespace * @@ -155,20 +158,28 @@ struct mlx5_fs_vlan { #define MLX5_FS_VLAN_DEPTH 2 +enum { + FLOW_ACT_HAS_TAG = BIT(0), + FLOW_ACT_NO_APPEND = BIT(1), +}; + struct mlx5_flow_act { u32 action; - bool has_flow_tag; u32 flow_tag; - u32 encap_id; + u32 reformat_id; u32 modify_id; uintptr_t esp_id; + u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; }; #define MLX5_DECLARE_FLOW_ACT(name) \ - struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - MLX5_FS_DEFAULT_FLOW_TAG, 0, 0} + struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \ + .reformat_id = 0, \ + .modify_id = 0, \ + .flags = 0, } /* Single destination per rule. * Group ID is implied by the match criteria. @@ -185,15 +196,30 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_flow_destination *new_dest, struct mlx5_flow_destination *old_dest); -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, u64 *packets, u64 *bytes); +u32 mlx5_fc_id(struct mlx5_fc *counter); int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + u32 modify_header_id); + +int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + u32 *packet_reformat_id); +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + u32 packet_reformat_id); + #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6e8a882052b1..dbff9ff28f2c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -243,8 +243,8 @@ enum { MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, @@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 encap[0x1]; + u8 reformat[0x1]; u8 decap[0x1]; u8 reserved_at_9[0x1]; u8 pop_vlan[0x1]; @@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_c[0x1]; u8 pop_vlan_2[0x1]; u8 push_vlan_2[0x1]; - u8 reserved_at_f[0x11]; - + u8 reformat_and_vlan_action[0x1]; + u8 reserved_at_10[0x2]; + u8 reformat_l3_tunnel_to_l2[0x1]; + u8 reformat_l2_to_l3_tunnel[0x1]; + u8 reformat_and_modify_action[0x1]; + u8 reserved_at_14[0xb]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1fd]; + u8 reserved_at_3[0x1d]; + u8 encap_general_header[0x1]; + u8 reserved_at_21[0xa]; + u8 log_max_packet_reformat_context[0x5]; + u8 reserved_at_30[0x6]; + u8 max_encap_header_size[0xa]; + u8 reserved_at_40[0x1c0]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; @@ -574,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_0[0x1c]; u8 fdb_multi_path_to_table[0x1]; - u8 reserved_at_1d[0x1e3]; + u8 reserved_at_1d[0x1]; + u8 multi_fdb_encap[0x1]; + u8 reserved_at_1e[0x1e1]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; @@ -599,7 +611,7 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vxlan_encap_decap[0x1]; u8 nvgre_encap_decap[0x1]; u8 reserved_at_22[0x9]; - u8 log_max_encap_headers[0x5]; + u8 log_max_packet_reformat_context[0x5]; u8 reserved_2b[0x6]; u8 max_encap_header_size[0xa]; @@ -996,7 +1008,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 umr_modify_atomic_disabled[0x1]; u8 umr_indirect_mkey_disabled[0x1]; u8 umr_fence[0x2]; - u8 reserved_at_20c[0x3]; + u8 dc_req_scat_data_cqe[0x1]; + u8 reserved_at_20d[0x2]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; @@ -1281,7 +1294,9 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x3]; + u8 dbr_umem_valid[0x1]; + u8 wq_umem_valid[0x1]; + u8 reserved_at_122[0x1]; u8 log_hairpin_num_packets[0x5]; u8 reserved_at_128[0x3]; u8 log_hairpin_data_sz[0x5]; @@ -2355,7 +2370,10 @@ struct mlx5_ifc_qpc_bits { u8 dc_access_key[0x40]; - u8 reserved_at_680[0xc0]; + u8 reserved_at_680[0x3]; + u8 dbr_umem_valid[0x1]; + + u8 reserved_at_684[0xbc]; }; struct mlx5_ifc_roce_addr_layout_bits { @@ -2395,7 +2413,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, - MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10, MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40, MLX5_FLOW_CONTEXT_ACTION_VLAN_POP = 0x80, @@ -2428,7 +2446,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_a0[0x8]; u8 flow_counter_list_size[0x18]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 modify_header_id[0x20]; @@ -2455,7 +2473,7 @@ struct mlx5_ifc_xrc_srqc_bits { u8 wq_signature[0x1]; u8 cont_srq[0x1]; - u8 reserved_at_22[0x1]; + u8 dbr_umem_valid[0x1]; u8 rlky[0x1]; u8 basic_cyclic_rcv_wqe[0x1]; u8 log_rq_stride[0x3]; @@ -2550,8 +2568,8 @@ enum { }; enum { - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST = 0x1, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST = 0x2, }; struct mlx5_ifc_tirc_bits { @@ -3119,7 +3137,9 @@ enum { struct mlx5_ifc_cqc_bits { u8 status[0x4]; - u8 reserved_at_4[0x4]; + u8 reserved_at_4[0x2]; + u8 dbr_umem_valid[0x1]; + u8 reserved_at_7[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; @@ -3386,7 +3406,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits { struct mlx5_ifc_sqerr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3416,7 +3436,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits { struct mlx5_ifc_sqd2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3621,7 +3641,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits { struct mlx5_ifc_rts2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3651,7 +3671,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits { struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3681,7 +3701,7 @@ struct mlx5_ifc_rst2init_qp_out_bits { struct mlx5_ifc_rst2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -4804,19 +4824,19 @@ struct mlx5_ifc_query_eq_in_bits { u8 reserved_at_60[0x20]; }; -struct mlx5_ifc_encap_header_in_bits { +struct mlx5_ifc_packet_reformat_context_in_bits { u8 reserved_at_0[0x5]; - u8 header_type[0x3]; + u8 reformat_type[0x3]; u8 reserved_at_8[0xe]; - u8 encap_header_size[0xa]; + u8 reformat_data_size[0xa]; u8 reserved_at_20[0x10]; - u8 encap_header[2][0x8]; + u8 reformat_data[2][0x8]; - u8 more_encap_header[0][0x8]; + u8 more_reformat_data[0][0x8]; }; -struct mlx5_ifc_query_encap_header_out_bits { +struct mlx5_ifc_query_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4824,33 +4844,41 @@ struct mlx5_ifc_query_encap_header_out_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_encap_header_in_bits encap_header[0]; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0]; }; -struct mlx5_ifc_query_encap_header_in_bits { +struct mlx5_ifc_query_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_at_60[0xa0]; }; -struct mlx5_ifc_alloc_encap_header_out_bits { +struct mlx5_ifc_alloc_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_at_60[0x20]; }; -struct mlx5_ifc_alloc_encap_header_in_bits { +enum { + MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, + MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, + MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, + MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, +}; + +struct mlx5_ifc_alloc_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -4859,10 +4887,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_encap_header_in_bits encap_header; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context; }; -struct mlx5_ifc_dealloc_encap_header_out_bits { +struct mlx5_ifc_dealloc_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4871,14 +4899,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_dealloc_encap_header_in_bits { +struct mlx5_ifc_dealloc_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_20[0x10]; u8 op_mod[0x10]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_60[0x20]; }; @@ -5176,7 +5204,7 @@ struct mlx5_ifc_qp_2rst_out_bits { struct mlx5_ifc_qp_2rst_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5198,7 +5226,7 @@ struct mlx5_ifc_qp_2err_out_bits { struct mlx5_ifc_qp_2err_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5298,7 +5326,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits { struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5337,7 +5365,7 @@ struct mlx5_ifc_modify_tir_out_bits { struct mlx5_ifc_modify_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5365,7 +5393,7 @@ struct mlx5_ifc_modify_sq_out_bits { struct mlx5_ifc_modify_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5438,7 +5466,7 @@ struct mlx5_ifc_rqt_bitmask_bits { struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5472,7 +5500,7 @@ enum { struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5508,7 +5536,7 @@ struct mlx5_ifc_rmp_bitmask_bits { struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5613,7 +5641,7 @@ enum { struct mlx5_ifc_modify_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5625,7 +5653,10 @@ struct mlx5_ifc_modify_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x40]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2c1[0x5bf]; u8 pas[0][0x40]; }; @@ -5773,7 +5804,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits { struct mlx5_ifc_init2rtr_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5803,7 +5834,7 @@ struct mlx5_ifc_init2init_qp_out_bits { struct mlx5_ifc_init2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5902,7 +5933,7 @@ struct mlx5_ifc_drain_dct_out_bits { struct mlx5_ifc_drain_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5946,7 +5977,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits { struct mlx5_ifc_detach_from_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5970,7 +6001,7 @@ struct mlx5_ifc_destroy_xrq_out_bits { struct mlx5_ifc_destroy_xrq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5992,7 +6023,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits { struct mlx5_ifc_destroy_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6014,7 +6045,7 @@ struct mlx5_ifc_destroy_tis_out_bits { struct mlx5_ifc_destroy_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6036,7 +6067,7 @@ struct mlx5_ifc_destroy_tir_out_bits { struct mlx5_ifc_destroy_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6058,7 +6089,7 @@ struct mlx5_ifc_destroy_srq_out_bits { struct mlx5_ifc_destroy_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6080,7 +6111,7 @@ struct mlx5_ifc_destroy_sq_out_bits { struct mlx5_ifc_destroy_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6126,7 +6157,7 @@ struct mlx5_ifc_destroy_rqt_out_bits { struct mlx5_ifc_destroy_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6148,7 +6179,7 @@ struct mlx5_ifc_destroy_rq_out_bits { struct mlx5_ifc_destroy_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6192,7 +6223,7 @@ struct mlx5_ifc_destroy_rmp_out_bits { struct mlx5_ifc_destroy_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6214,7 +6245,7 @@ struct mlx5_ifc_destroy_qp_out_bits { struct mlx5_ifc_destroy_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6366,7 +6397,7 @@ struct mlx5_ifc_destroy_dct_out_bits { struct mlx5_ifc_destroy_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6388,7 +6419,7 @@ struct mlx5_ifc_destroy_cq_out_bits { struct mlx5_ifc_destroy_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6491,7 +6522,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits { struct mlx5_ifc_dealloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6579,7 +6610,7 @@ struct mlx5_ifc_dealloc_pd_out_bits { struct mlx5_ifc_dealloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6625,7 +6656,7 @@ struct mlx5_ifc_create_xrq_out_bits { struct mlx5_ifc_create_xrq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6649,7 +6680,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits { struct mlx5_ifc_create_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6658,7 +6689,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits { struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x40]; + u8 xrc_srq_umem_valid[0x1]; + u8 reserved_at_2c1[0x5bf]; u8 pas[0][0x40]; }; @@ -6677,7 +6710,7 @@ struct mlx5_ifc_create_tis_out_bits { struct mlx5_ifc_create_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6701,7 +6734,7 @@ struct mlx5_ifc_create_tir_out_bits { struct mlx5_ifc_create_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6725,7 +6758,7 @@ struct mlx5_ifc_create_srq_out_bits { struct mlx5_ifc_create_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6753,7 +6786,7 @@ struct mlx5_ifc_create_sq_out_bits { struct mlx5_ifc_create_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6807,7 +6840,7 @@ struct mlx5_ifc_create_rqt_out_bits { struct mlx5_ifc_create_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6831,7 +6864,7 @@ struct mlx5_ifc_create_rq_out_bits { struct mlx5_ifc_create_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6855,7 +6888,7 @@ struct mlx5_ifc_create_rmp_out_bits { struct mlx5_ifc_create_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6879,7 +6912,7 @@ struct mlx5_ifc_create_qp_out_bits { struct mlx5_ifc_create_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6892,7 +6925,10 @@ struct mlx5_ifc_create_qp_in_bits { struct mlx5_ifc_qpc_bits qpc; - u8 reserved_at_800[0x80]; + u8 reserved_at_800[0x60]; + + u8 wq_umem_valid[0x1]; + u8 reserved_at_861[0x1f]; u8 pas[0][0x40]; }; @@ -6954,7 +6990,8 @@ struct mlx5_ifc_create_mkey_in_bits { u8 reserved_at_40[0x20]; u8 pg_access[0x1]; - u8 reserved_at_61[0x1f]; + u8 mkey_umem_valid[0x1]; + u8 reserved_at_62[0x1e]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; @@ -6980,7 +7017,7 @@ struct mlx5_ifc_create_flow_table_out_bits { }; struct mlx5_ifc_flow_table_context_bits { - u8 encap_en[0x1]; + u8 reformat_en[0x1]; u8 decap_en[0x1]; u8 reserved_at_2[0x2]; u8 table_miss_action[0x4]; @@ -7122,7 +7159,7 @@ struct mlx5_ifc_create_dct_out_bits { struct mlx5_ifc_create_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7148,7 +7185,7 @@ struct mlx5_ifc_create_cq_out_bits { struct mlx5_ifc_create_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7157,7 +7194,10 @@ struct mlx5_ifc_create_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x60]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2e1[0x59f]; u8 pas[0][0x40]; }; @@ -7205,7 +7245,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits { struct mlx5_ifc_attach_to_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7256,7 +7296,7 @@ enum { struct mlx5_ifc_arm_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7284,7 +7324,7 @@ enum { struct mlx5_ifc_arm_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7332,7 +7372,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits { struct mlx5_ifc_alloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7420,7 +7460,7 @@ struct mlx5_ifc_alloc_pd_out_bits { struct mlx5_ifc_alloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7788,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits { struct mlx5_ifc_pplm_reg_bits { u8 reserved_at_0[0x8]; - u8 local_port[0x8]; - u8 reserved_at_10[0x10]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0x20]; - u8 port_profile_mode[0x8]; - u8 static_port_profile[0x8]; - u8 active_port_profile[0x8]; - u8 reserved_at_58[0x8]; + u8 port_profile_mode[0x8]; + u8 static_port_profile[0x8]; + u8 active_port_profile[0x8]; + u8 reserved_at_58[0x8]; - u8 retransmission_active[0x8]; - u8 fec_mode_active[0x18]; + u8 retransmission_active[0x8]; + u8 fec_mode_active[0x18]; - u8 reserved_at_80[0x20]; + u8 rs_fec_correction_bypass_cap[0x4]; + u8 reserved_at_84[0x8]; + u8 fec_override_cap_56g[0x4]; + u8 fec_override_cap_100g[0x4]; + u8 fec_override_cap_50g[0x4]; + u8 fec_override_cap_25g[0x4]; + u8 fec_override_cap_10g_40g[0x4]; + + u8 rs_fec_correction_bypass_admin[0x4]; + u8 reserved_at_a4[0x8]; + u8 fec_override_admin_56g[0x4]; + u8 fec_override_admin_100g[0x4]; + u8 fec_override_admin_50g[0x4]; + u8 fec_override_admin_25g[0x4]; + u8 fec_override_admin_10g_40g[0x4]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -8086,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 rx_icrc_encapsulated_counter[0x1]; u8 reserved_at_6e[0x8]; u8 pfcc_mask[0x1]; - u8 reserved_at_77[0x4]; + u8 reserved_at_77[0x3]; + u8 per_lane_error_counters[0x1]; u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; @@ -8097,7 +8152,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits { struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; - u8 port_access_reg_cap_mask_63_to_32[0x20]; + + u8 port_access_reg_cap_mask_63_to_36[0x1c]; + u8 pplm[0x1]; + u8 port_access_reg_cap_mask_34_to_32[0x3]; u8 port_access_reg_cap_mask_31_to_13[0x13]; u8 pbmc[0x1]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4778d41085d4..fbe322c966bc 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -471,6 +471,7 @@ struct mlx5_core_qp { int qpn; struct mlx5_rsc_debug *dbg; int pid; + u16 uid; }; struct mlx5_core_dct { diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 24ff23e27c8a..1b1f3c20c6a3 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -61,6 +61,7 @@ struct mlx5_srq_attr { u32 tm_next_tag; u32 tm_hw_phase_cnt; u32 tm_sw_phase_cnt; + u16 uid; }; struct mlx5_core_dev; diff --git a/include/linux/module.h b/include/linux/module.h index f807f15bebbe..e19ae08c7fb8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -20,6 +20,7 @@ #include <linux/export.h> #include <linux/rbtree_latch.h> #include <linux/error-injection.h> +#include <linux/tracepoint-defs.h> #include <linux/percpu.h> #include <asm/module.h> @@ -430,7 +431,7 @@ struct module { #ifdef CONFIG_TRACEPOINTS unsigned int num_tracepoints; - struct tracepoint * const *tracepoints_ptrs; + tracepoint_ptr_t *tracepoints_ptrs; #endif #ifdef HAVE_JUMP_LABEL struct jump_entry *jump_entries; diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index ecf7dab81e9e..c6000046c966 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, const struct list_head *nf_osf_fingers); const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers); + const struct list_head *nf_osf_fingers, + const int ttl_check); #endif /* _NFOSF_H */ diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 3ef82d3a78db..676f1ff161a9 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -31,8 +31,6 @@ struct netpoll { bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; - - struct work_struct cleanup_work; }; struct netpoll_info { @@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); -void __netpoll_free_async(struct netpoll *np); +void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 10f92e1d8e7b..bf309ff6f244 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -99,6 +99,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + int (*filter_match)(struct perf_event *event); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index dee3c9c744f7..a47321a0d572 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -667,15 +667,35 @@ enum qed_link_mode_bits { QED_LM_Autoneg_BIT = BIT(1), QED_LM_Asym_Pause_BIT = BIT(2), QED_LM_Pause_BIT = BIT(3), - QED_LM_1000baseT_Half_BIT = BIT(4), - QED_LM_1000baseT_Full_BIT = BIT(5), + QED_LM_1000baseT_Full_BIT = BIT(4), + QED_LM_10000baseT_Full_BIT = BIT(5), QED_LM_10000baseKR_Full_BIT = BIT(6), QED_LM_20000baseKR2_Full_BIT = BIT(7), QED_LM_25000baseKR_Full_BIT = BIT(8), QED_LM_40000baseLR4_Full_BIT = BIT(9), QED_LM_50000baseKR2_Full_BIT = BIT(10), QED_LM_100000baseKR4_Full_BIT = BIT(11), - QED_LM_COUNT = 11 + QED_LM_2500baseX_Full_BIT = BIT(12), + QED_LM_Backplane_BIT = BIT(13), + QED_LM_1000baseKX_Full_BIT = BIT(14), + QED_LM_10000baseKX4_Full_BIT = BIT(15), + QED_LM_10000baseR_FEC_BIT = BIT(16), + QED_LM_40000baseKR4_Full_BIT = BIT(17), + QED_LM_40000baseCR4_Full_BIT = BIT(18), + QED_LM_40000baseSR4_Full_BIT = BIT(19), + QED_LM_25000baseCR_Full_BIT = BIT(20), + QED_LM_25000baseSR_Full_BIT = BIT(21), + QED_LM_50000baseCR2_Full_BIT = BIT(22), + QED_LM_100000baseSR4_Full_BIT = BIT(23), + QED_LM_100000baseCR4_Full_BIT = BIT(24), + QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25), + QED_LM_50000baseSR2_Full_BIT = BIT(26), + QED_LM_1000baseX_Full_BIT = BIT(27), + QED_LM_10000baseCR_Full_BIT = BIT(28), + QED_LM_10000baseSR_Full_BIT = BIT(29), + QED_LM_10000baseLR_Full_BIT = BIT(30), + QED_LM_10000baseLRM_Full_BIT = BIT(31), + QED_LM_COUNT = 32 }; struct qed_link_params { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 119d092c6b13..0ba687454267 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); + /* fall through */ case 24: diffs |= __it_diff(a, b, 64); + /* fall through */ case 16: diffs |= __it_diff(a, b, 64); + /* fall through */ case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); + /* fall through */ case 20: diffs |= __it_diff(a, b, 64); + /* fall through */ case 12: diffs |= __it_diff(a, b, 64); + /* fall through */ case 4: diffs |= __it_diff(a, b, 32); break; } diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 22c5a46e9693..49ba9cde7e4b 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,6 +35,12 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef const int tracepoint_ptr_t; +#else +typedef struct tracepoint * const tracepoint_ptr_t; +#endif + struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 041f7e56a289..538ba1a58f5b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -99,6 +99,29 @@ extern void syscall_unregfunc(void); #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return offset_to_ptr(p); +} + +#define __TRACEPOINT_ENTRY(name) \ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4 \n" \ + " .long __tracepoint_" #name " - . \n" \ + " .previous \n") +#else +static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) +{ + return *p; +} + +#define __TRACEPOINT_ENTRY(name) \ + static tracepoint_ptr_t __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + #endif /* _LINUX_TRACEPOINT_H */ /* @@ -253,19 +276,6 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } -#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS -#define __TRACEPOINT_ENTRY(name) \ - asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ - " .balign 4 \n" \ - " .long __tracepoint_" #name " - . \n" \ - " .previous \n") -#else -#define __TRACEPOINT_ENTRY(name) \ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name -#endif - /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3555440e14fc..093aedebdf0c 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -277,12 +277,19 @@ struct l2cap_conn_rsp { #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 #define L2CAP_CR_BAD_AMP 0x0005 -#define L2CAP_CR_AUTHENTICATION 0x0005 -#define L2CAP_CR_AUTHORIZATION 0x0006 -#define L2CAP_CR_BAD_KEY_SIZE 0x0007 -#define L2CAP_CR_ENCRYPTION 0x0008 -#define L2CAP_CR_INVALID_SCID 0x0009 -#define L2CAP_CR_SCID_IN_USE 0x000A +#define L2CAP_CR_INVALID_SCID 0x0006 +#define L2CAP_CR_SCID_IN_USE 0x0007 + +/* credit based connect results */ +#define L2CAP_CR_LE_SUCCESS 0x0000 +#define L2CAP_CR_LE_BAD_PSM 0x0002 +#define L2CAP_CR_LE_NO_MEM 0x0004 +#define L2CAP_CR_LE_AUTHENTICATION 0x0005 +#define L2CAP_CR_LE_AUTHORIZATION 0x0006 +#define L2CAP_CR_LE_BAD_KEY_SIZE 0x0007 +#define L2CAP_CR_LE_ENCRYPTION 0x0008 +#define L2CAP_CR_LE_INVALID_SCID 0x0009 +#define L2CAP_CR_LE_SCID_IN_USE 0X000A /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 diff --git a/include/net/dst.h b/include/net/dst.h index 7f735e76ca73..6cf0870414c7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu); } +static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, + struct dst_entry *encap_dst, + int headroom) +{ + u32 encap_mtu = dst_mtu(encap_dst); + + if (skb->len > encap_mtu - headroom) + skb_dst_update_pmtu(skb, encap_mtu - headroom); +} + #endif /* _NET_DST_H */ diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 482a1b705362..c8e2bebd8d93 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) * 1 if something is broken and should be logged (!!! above) * 2 if packet should be dropped */ -static inline int INET_ECN_decapsulate(struct sk_buff *skb, - __u8 outer, __u8 inner) +static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) { if (INET_ECN_is_not_ect(inner)) { switch (outer & INET_ECN_MASK) { @@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb, } } - if (INET_ECN_is_ce(outer)) + *set_ce = INET_ECN_is_ce(outer); + return 0; +} + +static inline int INET_ECN_decapsulate(struct sk_buff *skb, + __u8 outer, __u8 inner) +{ + bool set_ce = false; + int rc; + + rc = __INET_ECN_decapsulate(outer, inner, &set_ce); + if (!rc && set_ce) INET_ECN_set_ce(skb); - return 0; + return rc; } static inline int IP_ECN_decapsulate(const struct iphdr *oiph, diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index caabfd84a098..84097010237c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -159,6 +159,10 @@ struct fib6_info { struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; +#ifdef CONFIG_IPV6_ROUTER_PREF + unsigned long last_probe; +#endif + u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 0e355f4a3d76..77e2761d4f2f 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -99,7 +99,7 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table, void (*iter)(struct flow_offload *flow, void *data), void *data); -void nf_flow_table_cleanup(struct net *net, struct net_device *dev); +void nf_flow_table_cleanup(struct net_device *dev); int nf_flow_table_init(struct nf_flowtable *flow_table); void nf_flow_table_free(struct nf_flowtable *flow_table); diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/net/netfilter/nfnetlink_log.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 86f034b524d4..8dadc74c22e7 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -148,11 +148,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \ a->chunk_hdr->type == SCTP_CID_I_DATA) -/* Calculate the actual data size in a data chunk */ -#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \ - (unsigned long)(c->chunk_hdr) - \ - sctp_datachk_len(&c->asoc->stream))) - /* Internal error codes */ enum sctp_ierror { SCTP_IERROR_NO_ERROR = 0, diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 5ef1bad81ef5..9e3d32746430 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sctp_datahdr_len(&chunk->asoc->stream); + size -= sctp_datachk_len(&chunk->asoc->stream); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 28a7c8e44636..a11f93790476 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -876,6 +876,8 @@ struct sctp_transport { unsigned long sackdelay; __u32 sackfreq; + atomic_t mtu_info; + /* When was the last time that we heard from this transport? We use * this to pick new active and retran paths. */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d574ce63bf22..881ecb1555bf 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -145,6 +145,10 @@ enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_DEVICE, SWITCHDEV_FDB_DEL_TO_DEVICE, SWITCHDEV_FDB_OFFLOADED, + + SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, + SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, + SWITCHDEV_VXLAN_FDB_OFFLOADED, }; struct switchdev_notifier_info { @@ -155,7 +159,8 @@ struct switchdev_notifier_fdb_info { struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; - bool added_by_user; + u8 added_by_user:1, + offloaded:1; }; static inline struct net_device * diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 7ef15179f263..03431c148e16 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -5,6 +5,8 @@ #include <linux/if_vlan.h> #include <net/udp_tunnel.h> #include <net/dst_metadata.h> +#include <net/rtnetlink.h> +#include <net/switchdev.h> /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -190,6 +192,7 @@ union vxlan_addr { struct vxlan_rdst { union vxlan_addr remote_ip; __be16 remote_port; + u8 offloaded:1; __be32 remote_vni; u32 remote_ifindex; struct list_head list; @@ -370,4 +373,65 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) return vs->sock->sk->sk_family; } +#if IS_ENABLED(CONFIG_IPV6) + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_any(&ipa->sin6.sin6_addr); + else + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + if (ipa->sa.sa_family == AF_INET6) + return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr); + else + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +#else /* !IS_ENABLED(CONFIG_IPV6) */ + +static inline bool vxlan_addr_any(const union vxlan_addr *ipa) +{ + return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY); +} + +static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa) +{ + return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr)); +} + +#endif /* IS_ENABLED(CONFIG_IPV6) */ + +static inline bool netif_is_vxlan(const struct net_device *dev) +{ + return dev->rtnl_link_ops && + !strcmp(dev->rtnl_link_ops->kind, "vxlan"); +} + +struct switchdev_notifier_vxlan_fdb_info { + struct switchdev_notifier_info info; /* must be first */ + union vxlan_addr remote_ip; + __be16 remote_port; + __be32 remote_vni; + u32 remote_ifindex; + u8 eth_addr[ETH_ALEN]; + __be32 vni; + bool offloaded; +}; + +#if IS_ENABLED(CONFIG_VXLAN) +int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info); +#else +static inline int +vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni, + struct switchdev_notifier_vxlan_fdb_info *fdb_info) +{ + return -ENOENT; +} +#endif + #endif diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5444e76870bb..579974b0bf0d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1511,9 +1511,16 @@ enum nft_flowtable_hook_attributes { }; #define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) +/** + * enum nft_osf_attributes - nftables osf expression netlink attributes + * + * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers) + * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8) + */ enum nft_osf_attributes { NFTA_OSF_UNSPEC, NFTA_OSF_DREG, + NFTA_OSF_TTL, __NFTA_OSF_MAX, }; #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1) diff --git a/include/uapi/linux/netfilter/xt_quota.h b/include/uapi/linux/netfilter/xt_quota.h index d72fd52adbba..f3ba5d9e58b6 100644 --- a/include/uapi/linux/netfilter/xt_quota.h +++ b/include/uapi/linux/netfilter/xt_quota.h @@ -15,11 +15,9 @@ struct xt_quota_info { __u32 flags; __u32 pad; __aligned_u64 quota; -#ifdef __KERNEL__ - atomic64_t counter; -#else - __aligned_u64 remain; -#endif + + /* Used internally by the kernel */ + struct xt_quota_priv *master; }; #endif /* _XT_QUOTA_H */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index b479db5c71d9..34dd3d497f2c 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -301,6 +301,7 @@ enum sctp_sinfo_flags { SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ /* 2 bits here have been used by SCTP_PR_SCTP_MASK */ SCTP_SENDALL = (1 << 6), + SCTP_PR_SCTP_ALL = (1 << 7), SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ }; diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index ef0b7b6ef8a5..686d244e798d 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, sock_hold(sock->sk); old_xs = xchg(&m->xsk_map[i], xs); - if (old_xs) { - /* Make sure we've flushed everything. */ - synchronize_net(); + if (old_xs) sock_put((struct sock *)old_xs); - } sockfd_put(sock); return 0; @@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return -EINVAL; old_xs = xchg(&m->xsk_map[k], NULL); - if (old_xs) { - /* Make sure we've flushed everything. */ - synchronize_net(); + if (old_xs) sock_put((struct sock *)old_xs); - } return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7fc4a371bdd2..908c9cdae2f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4001,7 +4001,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * put back on, and if we advance min_vruntime, we'll be placed back * further than we started -- ie. we'll be penalized. */ - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) update_min_vruntime(cfs_rq); } @@ -4476,9 +4476,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) /* * Add to the _head_ of the list, so that an already-started - * distribute_cfs_runtime will not see us + * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is + * not running add to the tail so that later runqueues don't get starved. */ - list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (cfs_b->distribute_running) + list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + else + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /* * If we're the first throttled task, make sure the bandwidth @@ -4622,14 +4626,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * in us over-using our runtime if it is all used during this loop, but * only by limited amounts in that extreme case. */ - while (throttled && cfs_b->runtime > 0) { + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ runtime = distribute_cfs_runtime(cfs_b, runtime, runtime_expires); raw_spin_lock(&cfs_b->lock); + cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->runtime -= min(runtime, cfs_b->runtime); @@ -4740,6 +4746,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); + if (cfs_b->distribute_running) { + raw_spin_unlock(&cfs_b->lock); + return; + } + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { raw_spin_unlock(&cfs_b->lock); return; @@ -4749,6 +4760,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) runtime = cfs_b->runtime; expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); if (!runtime) @@ -4759,6 +4773,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) raw_spin_lock(&cfs_b->lock); if (expires == cfs_b->runtime_expires) cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -4867,6 +4882,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period_timer.function = sched_cfs_period_timer; hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->slack_timer.function = sched_cfs_slack_timer; + cfs_b->distribute_running = 0; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 455fa330de04..9683f458aec7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -346,6 +346,8 @@ struct cfs_bandwidth { int nr_periods; int nr_throttled; u64 throttled_time; + + bool distribute_running; #endif }; diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index f704390db9fc..d8765c952fab 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -5,12 +5,12 @@ * Copyright (C) 2018 Joel Fernandes (Google) <joel@joelfernandes.org> */ +#include <linux/trace_clock.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kernel.h> #include <linux/kthread.h> -#include <linux/ktime.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/string.h> @@ -25,13 +25,13 @@ MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt or irq (default ir static void busy_wait(ulong time) { - ktime_t start, end; - start = ktime_get(); + u64 start, end; + start = trace_clock_local(); do { - end = ktime_get(); + end = trace_clock_local(); if (kthread_should_stop()) break; - } while (ktime_to_ns(ktime_sub(end, start)) < (time * 1000)); + } while ((end - start) < (time * 1000)); } static int preemptirq_delay_run(void *data) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 85f6b01431c7..d239004aaf29 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -738,16 +738,30 @@ static void free_synth_field(struct synth_field *field) kfree(field); } -static struct synth_field *parse_synth_field(char *field_type, - char *field_name) +static struct synth_field *parse_synth_field(int argc, char **argv, + int *consumed) { struct synth_field *field; + const char *prefix = NULL; + char *field_type = argv[0], *field_name; int len, ret = 0; char *array; if (field_type[0] == ';') field_type++; + if (!strcmp(field_type, "unsigned")) { + if (argc < 3) + return ERR_PTR(-EINVAL); + prefix = "unsigned "; + field_type = argv[1]; + field_name = argv[2]; + *consumed = 3; + } else { + field_name = argv[1]; + *consumed = 2; + } + len = strlen(field_name); if (field_name[len - 1] == ';') field_name[len - 1] = '\0'; @@ -760,11 +774,15 @@ static struct synth_field *parse_synth_field(char *field_type, array = strchr(field_name, '['); if (array) len += strlen(array); + if (prefix) + len += strlen(prefix); field->type = kzalloc(len, GFP_KERNEL); if (!field->type) { ret = -ENOMEM; goto free; } + if (prefix) + strcat(field->type, prefix); strcat(field->type, field_type); if (array) { strcat(field->type, array); @@ -1009,7 +1027,7 @@ static int create_synth_event(int argc, char **argv) struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; struct synth_event *event = NULL; bool delete_event = false; - int i, n_fields = 0, ret = 0; + int i, consumed = 0, n_fields = 0, ret = 0; char *name; mutex_lock(&synth_event_mutex); @@ -1061,16 +1079,16 @@ static int create_synth_event(int argc, char **argv) goto err; } - field = parse_synth_field(argv[i], argv[i + 1]); + field = parse_synth_field(argc - i, &argv[i], &consumed); if (IS_ERR(field)) { ret = PTR_ERR(field); goto err; } - fields[n_fields] = field; - i++; n_fields++; + fields[n_fields++] = field; + i += consumed - 1; } - if (i < argc) { + if (i < argc && strcmp(argv[i], ";") != 0) { ret = -EINVAL; goto err; } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index bf2c06ef9afc..a3be42304485 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,8 +28,8 @@ #include <linux/sched/task.h> #include <linux/static_key.h> -extern struct tracepoint * const __start___tracepoints_ptrs[]; -extern struct tracepoint * const __stop___tracepoints_ptrs[]; +extern tracepoint_ptr_t __start___tracepoints_ptrs[]; +extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); @@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); -static void for_each_tracepoint_range(struct tracepoint * const *begin, - struct tracepoint * const *end, +static void for_each_tracepoint_range( + tracepoint_ptr_t *begin, tracepoint_ptr_t *end, void (*fct)(struct tracepoint *tp, void *priv), void *priv) { + tracepoint_ptr_t *iter; + if (!begin) return; - - if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { - const int *iter; - - for (iter = (const int *)begin; iter < (const int *)end; iter++) - fct(offset_to_ptr(iter), priv); - } else { - struct tracepoint * const *iter; - - for (iter = begin; iter < end; iter++) - fct(*iter, priv); - } + for (iter = begin; iter < end; iter++) + fct(tracepoint_ptr_deref(iter), priv); } #ifdef CONFIG_MODULES diff --git a/lib/test_ida.c b/lib/test_ida.c index 2d1637d8136b..b06880625961 100644 --- a/lib/test_ida.c +++ b/lib/test_ida.c @@ -150,10 +150,10 @@ static void ida_check_conv(struct ida *ida) IDA_BUG_ON(ida, !ida_is_empty(ida)); } +static DEFINE_IDA(ida); + static int ida_checks(void) { - DEFINE_IDA(ida); - IDA_BUG_ON(&ida, !ida_is_empty(&ida)); ida_check_alloc(&ida); ida_check_destroy(&ida); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 00704060b7f7..deed97fba979 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd) bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) + pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; @@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); - if (pmd_present(pmd) && pmd_dirty(pmd)) + if (pmd_present(pmd)) force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, } pmd = move_soft_dirty_pmd(pmd); set_pmd_at(mm, new_addr, new_pmd, pmd); - if (new_ptl != old_ptl) - spin_unlock(new_ptl); if (force_flush) flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); - else - *need_flush = true; + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); return true; } @@ -2885,9 +2883,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, if (!(pvmw->pmd && !pvmw->pte)) return; - mmu_notifier_invalidate_range_start(mm, address, - address + HPAGE_PMD_SIZE); - flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = *pvmw->pmd; pmdp_invalidate(vma, address, pvmw->pmd); @@ -2900,9 +2895,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, set_pmd_at(mm, address, pvmw->pmd, pmdswp); page_remove_rmap(page, true); put_page(page); - - mmu_notifier_invalidate_range_end(mm, address, - address + HPAGE_PMD_SIZE); } void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) diff --git a/mm/mmap.c b/mm/mmap.c index 5f2b2b184c60..f7cd9cb966c0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (flags & MAP_FIXED_NOREPLACE) { struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && vma->vm_start <= addr) + if (vma && vma->vm_start < addr + len) return -EEXIST; } diff --git a/mm/mremap.c b/mm/mremap.c index 5c2e18505f75..a9617e72e6b7 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks, bool *need_flush) + unsigned long new_addr, bool need_rmap_locks) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; @@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte = ptep_get_and_clear(mm, old_addr, old_pte); /* - * If we are remapping a dirty PTE, make sure + * If we are remapping a valid PTE, make sure * to flush TLB before we drop the PTL for the - * old PTE or we may race with page_mkclean(). + * PTE. * - * This check has to be done after we removed the - * old PTE from page tables or another thread may - * dirty it after the check and before the removal. + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. */ - if (pte_present(pte) && pte_dirty(pte)) + if (pte_present(pte)) force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); - if (force_flush) - flush_tlb_range(vma, old_end - len, old_end); - else - *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd, - &need_flush); + old_end, old_pmd, new_pmd); if (need_rmap_locks) drop_rmap_locks(vma); if (moved) @@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (extent > next - new_addr) extent = next - new_addr; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, - new_pmd, new_addr, need_rmap_locks, &need_flush); + new_pmd, new_addr, need_rmap_locks); } - if (need_flush) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 546af0e73ac3..ff720f1ebf73 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -756,8 +756,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) return; vlan->netpoll = NULL; - - __netpoll_free_async(netpoll); + __netpoll_free(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f47f8fad757a..ef9928d7b4fb 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4937,31 +4937,27 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); - if (!status) { - /* The remote features procedure is defined for master - * role only. So only in case of an initiated connection - * request the remote features. - * - * If the local controller supports slave-initiated features - * exchange, then requesting the remote features in slave - * role is possible. Otherwise just transition into the - * connected state without requesting the remote features. - */ - if (conn->out || - (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) { - struct hci_cp_le_read_remote_features cp; + /* The remote features procedure is defined for master + * role only. So only in case of an initiated connection + * request the remote features. + * + * If the local controller supports slave-initiated features + * exchange, then requesting the remote features in slave + * role is possible. Otherwise just transition into the + * connected state without requesting the remote features. + */ + if (conn->out || + (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) { + struct hci_cp_le_read_remote_features cp; - cp.handle = __cpu_to_le16(conn->handle); + cp.handle = __cpu_to_le16(conn->handle); - hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES, - sizeof(cp), &cp); + hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES, + sizeof(cp), &cp); - hci_conn_hold(conn); - } else { - conn->state = BT_CONNECTED; - hci_connect_cfm(conn, status); - } + hci_conn_hold(conn); } else { + conn->state = BT_CONNECTED; hci_connect_cfm(conn, status); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 514899f7f0d4..2146e0f3b6f8 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -680,9 +680,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) u16 result; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) - result = L2CAP_CR_AUTHORIZATION; + result = L2CAP_CR_LE_AUTHORIZATION; else - result = L2CAP_CR_BAD_PSM; + result = L2CAP_CR_LE_BAD_PSM; l2cap_state_change(chan, BT_DISCONN); @@ -3670,7 +3670,7 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) rsp.mtu = cpu_to_le16(chan->imtu); rsp.mps = cpu_to_le16(chan->mps); rsp.credits = cpu_to_le16(chan->rx_credits); - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), &rsp); @@ -3816,9 +3816,17 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, result = L2CAP_CR_NO_MEM; + /* Check for valid dynamic CID range (as per Erratum 3253) */ + if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_DYN_END) { + result = L2CAP_CR_INVALID_SCID; + goto response; + } + /* Check if we already have channel with that dcid */ - if (__l2cap_get_chan_by_dcid(conn, scid)) + if (__l2cap_get_chan_by_dcid(conn, scid)) { + result = L2CAP_CR_SCID_IN_USE; goto response; + } chan = pchan->ops->new_connection(pchan); if (!chan) @@ -5280,7 +5288,7 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, credits = __le16_to_cpu(rsp->credits); result = __le16_to_cpu(rsp->result); - if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 || + if (result == L2CAP_CR_LE_SUCCESS && (mtu < 23 || mps < 23 || dcid < L2CAP_CID_DYN_START || dcid > L2CAP_CID_LE_DYN_END)) return -EPROTO; @@ -5301,7 +5309,7 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, l2cap_chan_lock(chan); switch (result) { - case L2CAP_CR_SUCCESS: + case L2CAP_CR_LE_SUCCESS: if (__l2cap_get_chan_by_dcid(conn, dcid)) { err = -EBADSLT; break; @@ -5315,8 +5323,8 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, l2cap_chan_ready(chan); break; - case L2CAP_CR_AUTHENTICATION: - case L2CAP_CR_ENCRYPTION: + case L2CAP_CR_LE_AUTHENTICATION: + case L2CAP_CR_LE_ENCRYPTION: /* If we already have MITM protection we can't do * anything. */ @@ -5459,7 +5467,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); if (!pchan) { - result = L2CAP_CR_BAD_PSM; + result = L2CAP_CR_LE_BAD_PSM; chan = NULL; goto response; } @@ -5469,28 +5477,28 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { - result = L2CAP_CR_AUTHENTICATION; + result = L2CAP_CR_LE_AUTHENTICATION; chan = NULL; goto response_unlock; } /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { - result = L2CAP_CR_INVALID_SCID; + result = L2CAP_CR_LE_INVALID_SCID; chan = NULL; goto response_unlock; } /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { - result = L2CAP_CR_SCID_IN_USE; + result = L2CAP_CR_LE_SCID_IN_USE; chan = NULL; goto response_unlock; } chan = pchan->ops->new_connection(pchan); if (!chan) { - result = L2CAP_CR_NO_MEM; + result = L2CAP_CR_LE_NO_MEM; goto response_unlock; } @@ -5526,7 +5534,7 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, chan->ops->defer(chan); } else { l2cap_chan_ready(chan); - result = L2CAP_CR_SUCCESS; + result = L2CAP_CR_LE_SUCCESS; } response_unlock: diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index b64e1649993b..94e88f510c5b 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -23,9 +23,11 @@ static void shutdown_umh(struct umh_info *info) if (!info->pid) return; - tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); - if (tsk) + tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID); + if (tsk) { force_sig(SIGKILL, tsk); + put_task_struct(tsk); + } fput(info->pipe_to_umh); fput(info->pipe_from_umh); info->pid = 0; diff --git a/net/bridge/br.c b/net/bridge/br.c index e411e40333e2..360ad66c21e9 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -151,7 +151,7 @@ static int br_switchdev_event(struct notifier_block *unused, break; } br_fdb_offloaded_set(br, p, fdb_info->addr, - fdb_info->vid); + fdb_info->vid, true); break; case SWITCHDEV_FDB_DEL_TO_BRIDGE: fdb_info = ptr; @@ -163,7 +163,7 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_OFFLOADED: fdb_info = ptr; br_fdb_offloaded_set(br, p, fdb_info->addr, - fdb_info->vid); + fdb_info->vid, fdb_info->offloaded); break; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index e053a4e43758..c6abf927f0c9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -344,7 +344,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - __netpoll_free_async(np); + __netpoll_free(np); } #endif diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 74331690a390..e56ba3912a90 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1152,7 +1152,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, } void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, bool offloaded) { struct net_bridge_fdb_entry *fdb; @@ -1160,7 +1160,7 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, fdb = br_fdb_find(br, addr, vid); if (fdb) - fdb->offloaded = 1; + fdb->offloaded = offloaded; spin_unlock_bh(&br->hash_lock); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 10ee39fdca5c..2920e06a5403 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -574,7 +574,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid); + const unsigned char *addr, u16 vid, bool offloaded); /* br_forward.c */ enum br_pkt_type { diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index d77f807420c4..b993df770675 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -103,7 +103,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, static void br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, u16 vid, struct net_device *dev, - bool added_by_user) + bool added_by_user, bool offloaded) { struct switchdev_notifier_fdb_info info; unsigned long notifier_type; @@ -111,6 +111,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, info.addr = mac; info.vid = vid; info.added_by_user = added_by_user; + info.offloaded = offloaded; notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; call_switchdev_notifiers(notifier_type, dev, &info.info); } @@ -126,13 +127,15 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr, fdb->key.vlan_id, fdb->dst->dev, - fdb->added_by_user); + fdb->added_by_user, + fdb->offloaded); break; case RTM_NEWNEIGH: br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr, fdb->key.vlan_id, fdb->dst->dev, - fdb->added_by_user); + fdb->added_by_user, + fdb->offloaded); break; } } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4cc603dfc9ef..d05402868575 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -928,6 +928,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, return -EINVAL; } + if (info.cmd != cmd) + return -EINVAL; + if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) @@ -2392,13 +2395,17 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) +static int ethtool_set_per_queue(struct net_device *dev, + void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) return -EFAULT; + if (per_queue_opt.sub_command != sub_cmd) + return -EINVAL; + switch (per_queue_opt.sub_command) { case ETHTOOL_GCOALESCE: return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); @@ -2769,7 +2776,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_get_phy_stats(dev, useraddr); break; case ETHTOOL_PERQUEUE: - rc = ethtool_set_per_queue(dev, useraddr); + rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); break; case ETHTOOL_GLINKSETTINGS: rc = ethtool_get_link_ksettings(dev, useraddr); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 69c41cb3966d..ee605d9d8bd4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1167,8 +1167,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->nud_state = new; err = 0; notify = old & NUD_VALID; - if (((old & (NUD_INCOMPLETE | NUD_PROBE)) || - (flags & NEIGH_UPDATE_F_ADMIN)) && + if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && (new & NUD_FAILED)) { neigh_invalidate(neigh); notify = 1; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index de1d1ba92f2d..5da9552b186b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -57,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu); MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -312,7 +311,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; - rcu_read_lock_bh(); lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); @@ -357,7 +355,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - rcu_read_unlock_bh(); } EXPORT_SYMBOL(netpoll_send_skb_on_dev); @@ -591,7 +588,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); - INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", @@ -790,10 +786,6 @@ void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; - /* rtnl_dereference would be preferable here but - * rcu_cleanup_netpoll path can put us in here safely without - * holding the rtnl, so plain rcu_dereference it is - */ npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; @@ -814,21 +806,16 @@ void __netpoll_cleanup(struct netpoll *np) } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -static void netpoll_async_cleanup(struct work_struct *work) +void __netpoll_free(struct netpoll *np) { - struct netpoll *np = container_of(work, struct netpoll, cleanup_work); + ASSERT_RTNL(); - rtnl_lock(); + /* Wait for transmitting packets to finish before freeing. */ + synchronize_rcu_bh(); __netpoll_cleanup(np); - rtnl_unlock(); kfree(np); } - -void __netpoll_free_async(struct netpoll *np) -{ - schedule_work(&np->cleanup_work); -} -EXPORT_SYMBOL_GPL(__netpoll_free_async); +EXPORT_SYMBOL_GPL(__netpoll_free); void netpoll_cleanup(struct netpoll *np) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 54b961de9538..946de0e24c87 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1846,8 +1846,9 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; - skb->csum = csum_sub(skb->csum, - skb_checksum(skb, len, delta, 0)); + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); } return __pskb_trim(skb, len); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 3f840b6eea69..7d0c19e7edcf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -722,7 +722,7 @@ static void dsa_slave_netpoll_cleanup(struct net_device *dev) p->netpoll = NULL; - __netpoll_free_async(netpoll); + __netpoll_free(netpoll); } static void dsa_slave_poll_controller(struct net_device *dev) @@ -1478,6 +1478,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) netdev_dbg(dev, "fdb add failed err=%d\n", err); break; } + fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, &fdb_info->info); break; diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 844806120f44..3e614cc824f7 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -315,8 +315,6 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, next_entry: e++; } - e = 0; - s_e = 0; spin_lock_bh(lock); list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c index ac110c1d55b5..a0aa13bcabda 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -60,6 +60,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); MODULE_ALIAS("ip_nat_snmp_basic"); +MODULE_ALIAS_NFCT_HELPER("snmp_trap"); #define SNMP_PORT 161 #define SNMP_TRAP_PORT 162 diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index b88081285fd1..9277abdd822a 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) return cwnd; } +/* With pacing at lower layers, there's often less data "in the network" than + * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), + * we often have several skbs queued in the pacing layer with a pre-scheduled + * earliest departure time (EDT). BBR adapts its pacing rate based on the + * inflight level that it estimates has already been "baked in" by previous + * departure time decisions. We calculate a rough estimate of the number of our + * packets that might be in the network at the earliest departure time for the + * next skb scheduled: + * in_network_at_edt = inflight_at_edt - (EDT - now) * bw + * If we're increasing inflight, then we want to know if the transmit of the + * EDT skb will push inflight above the target, so inflight_at_edt includes + * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, + * then estimate if inflight will sink too low just before the EDT transmit. + */ +static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 now_ns, edt_ns, interval_us; + u32 interval_delivered, inflight_at_edt; + + now_ns = tp->tcp_clock_cache; + edt_ns = max(tp->tcp_wstamp_ns, now_ns); + interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); + interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; + inflight_at_edt = inflight_now; + if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */ + inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */ + if (interval_delivered >= inflight_at_edt) + return 0; + return inflight_at_edt - interval_delivered; +} + /* An optimization in BBR to reduce losses: On the first round of recovery, we * follow the packet conservation principle: send P packets per P packets acked. * After that, we slow-start and send at most 2*P packets per P packets acked. @@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, if (bbr->pacing_gain == BBR_UNIT) return is_full_length; /* just use wall clock time */ - inflight = rs->prior_in_flight; /* what was in-flight before ACK? */ + inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); bw = bbr_max_bw(sk); /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at @@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk) bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; - bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : - bbr_pacing_gain[bbr->cycle_idx]; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -507,8 +538,6 @@ static void bbr_reset_startup_mode(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->mode = BBR_STARTUP; - bbr->pacing_gain = bbr_high_gain; - bbr->cwnd_gain = bbr_high_gain; } static void bbr_reset_probe_bw_mode(struct sock *sk) @@ -516,8 +545,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->mode = BBR_PROBE_BW; - bbr->pacing_gain = BBR_UNIT; - bbr->cwnd_gain = bbr_cwnd_gain; bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ } @@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { bbr->mode = BBR_DRAIN; /* drain queue we created */ - bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ - bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ tcp_sk(sk)->snd_ssthresh = bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && - tcp_packets_in_flight(tcp_sk(sk)) <= + bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } @@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) if (bbr_probe_rtt_mode_ms > 0 && filter_expired && !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ - bbr->pacing_gain = BBR_UNIT; - bbr->cwnd_gain = BBR_UNIT; bbr_save_cwnd(sk); /* note cwnd so we can restore it */ bbr->probe_rtt_done_stamp = 0; } @@ -827,6 +850,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) bbr->idle_restart = 0; } +static void bbr_update_gains(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + switch (bbr->mode) { + case BBR_STARTUP: + bbr->pacing_gain = bbr_high_gain; + bbr->cwnd_gain = bbr_high_gain; + break; + case BBR_DRAIN: + bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ + bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ + break; + case BBR_PROBE_BW: + bbr->pacing_gain = (bbr->lt_use_bw ? + BBR_UNIT : + bbr_pacing_gain[bbr->cycle_idx]); + bbr->cwnd_gain = bbr_cwnd_gain; + break; + case BBR_PROBE_RTT: + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + break; + default: + WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); + break; + } +} + static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) { bbr_update_bw(sk, rs); @@ -834,6 +886,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) bbr_check_full_bw_reached(sk, rs); bbr_check_drain(sk, rs); bbr_update_min_rtt(sk, rs); + bbr_update_gains(sk); } static void bbr_main(struct sock *sk, const struct rate_sample *rs) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d212e4cbc689..c07990a35ff3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2321,18 +2321,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; + if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { + /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ + skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; + list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); + goto repair; /* Skip network transmission */ + } + if (tcp_pacing_check(sk)) break; tso_segs = tcp_init_tso_segs(skb, mss_now); BUG_ON(!tso_segs); - if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { - /* "skb_mstamp" is used as a start point for the retransmit timer */ - tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns); - goto repair; /* Skip network transmission */ - } - cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { if (push_one == 2) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index a9162aa11af9..95df7f7f6328 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -99,8 +99,10 @@ void tcp_cleanup_ulp(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - sock_owned_by_me(sk); - + /* No sock_owned_by_me() check here as at the time the + * stack calls this function, the socket is dead and + * about to be destroyed. + */ if (!icsk->icsk_ulp_ops) return; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2496b12bf721..e39c284e2954 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4972,12 +4972,14 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; + if (ip_idx < s_ip_idx) + goto next; err = inet6_fill_ifaddr(skb, ifa, fillargs); if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a0b6932c3afd..a9d06d4dd057 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1184,11 +1184,6 @@ route_lookup: } skb_dst_set(skb, dst); - if (encap_limit >= 0) { - init_tel_txopt(&opt, encap_limit); - ipv6_push_frag_opts(skb, &opt.ops, &proto); - } - if (hop_limit == 0) { if (skb->protocol == htons(ETH_P_IP)) hop_limit = ip_hdr(skb)->ttl; @@ -1210,6 +1205,11 @@ route_lookup: if (err) return err; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_frag_opts(skb, &opt.ops, &proto); + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6895e1dc0b03..21f6deb2aec9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9fd600e42f9d..e3226284e480 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -517,10 +517,11 @@ static void rt6_probe_deferred(struct work_struct *w) static void rt6_probe(struct fib6_info *rt) { - struct __rt6_probe_work *work; + struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; struct neighbour *neigh; struct net_device *dev; + struct inet6_dev *idev; /* * Okay, this does not seem to be appropriate @@ -536,15 +537,12 @@ static void rt6_probe(struct fib6_info *rt) nh_gw = &rt->fib6_nh.nh_gw; dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); + idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { - struct inet6_dev *idev; - if (neigh->nud_state & NUD_VALID) goto out; - idev = __in6_dev_get(dev); - work = NULL; write_lock(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, @@ -554,11 +552,13 @@ static void rt6_probe(struct fib6_info *rt) __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else { + } else if (time_after(jiffies, rt->last_probe + + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } if (work) { + rt->last_probe = jiffies; INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 374e7d302f26..06d17ff3562f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -766,11 +766,9 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, ret = udpv6_queue_rcv_skb(sk, skb); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef3defaf43b9..d35bcf92969c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -146,8 +146,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - while (nh + offset + 1 < skb->data || - pskb_may_pull(skb, nh + offset + 1 - skb->data)) { + while (nh + offset + sizeof(*exthdr) < skb->data || + pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { nh = skb_network_header(skb); exthdr = (struct ipv6_opt_hdr *)(nh + offset); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c0ac522b48a1..4ff89cb7c86f 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk) llc_sk(sk)->sap = sap; spin_lock_bh(&sap->sk_lock); + sock_set_flag(sk, SOCK_RCU_FREE); sap->sk_count++; sk_nulls_add_node_rcu(sk, laddr_hb); hlist_add_head(&llc->dev_hash_node, dev_hb); diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig index 08a8a6031fd7..7f2b46108a24 100644 --- a/net/ncsi/Kconfig +++ b/net/ncsi/Kconfig @@ -10,3 +10,9 @@ config NET_NCSI support. Enable this only if your system connects to a network device via NCSI and the ethernet driver you're using supports the protocol explicitly. +config NCSI_OEM_CMD_GET_MAC + bool "Get NCSI OEM MAC Address" + depends on NET_NCSI + ---help--- + This allows to get MAC address from NCSI firmware and set them back to + controller. diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 13c9b5eeb3b7..1dae77c54009 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -71,6 +71,13 @@ enum { /* OEM Vendor Manufacture ID */ #define NCSI_OEM_MFR_MLX_ID 0x8119 #define NCSI_OEM_MFR_BCM_ID 0x113d +/* Broadcom specific OEM Command */ +#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */ +/* OEM Command payload lengths*/ +#define NCSI_OEM_BCM_CMD_GMA_LEN 12 +/* Mac address offset in OEM response */ +#define BCM_MAC_ADDR_OFFSET 28 + struct ncsi_channel_version { u32 version; /* Supported BCD encoded NCSI version */ @@ -246,6 +253,7 @@ enum { ncsi_dev_state_probe_dp, ncsi_dev_state_config_sp = 0x0301, ncsi_dev_state_config_cis, + ncsi_dev_state_config_oem_gma, ncsi_dev_state_config_clear_vids, ncsi_dev_state_config_svf, ncsi_dev_state_config_ev, @@ -279,6 +287,7 @@ struct ncsi_dev_priv { #define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */ #define NCSI_DEV_HWA 2 /* Enabled HW arbitration */ #define NCSI_DEV_RESHUFFLE 4 + unsigned int gma_flag; /* OEM GMA flag */ spinlock_t lock; /* Protect the NCSI device */ #if IS_ENABLED(CONFIG_IPV6) unsigned int inet6_addr_num; /* Number of IPv6 addresses */ diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 6aa0614d2d28..bfc43b28c7a6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -651,6 +651,72 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return 0; } +#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + +/* NCSI OEM Command APIs */ +static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) +{ + unsigned char data[NCSI_OEM_BCM_CMD_GMA_LEN]; + int ret = 0; + + nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN; + + memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN); + *(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID); + data[5] = NCSI_OEM_BCM_CMD_GMA; + + nca->data = data; + + ret = ncsi_xmit_cmd(nca); + if (ret) + netdev_err(nca->ndp->ndev.dev, + "NCSI: Failed to transmit cmd 0x%x during configure\n", + nca->type); + return ret; +} + +/* OEM Command handlers initialization */ +static struct ncsi_oem_gma_handler { + unsigned int mfr_id; + int (*handler)(struct ncsi_cmd_arg *nca); +} ncsi_oem_gma_handlers[] = { + { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm } +}; + +static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) +{ + struct ncsi_oem_gma_handler *nch = NULL; + int i; + + /* This function should only be called once, return if flag set */ + if (nca->ndp->gma_flag == 1) + return -1; + + /* Find gma handler for given manufacturer id */ + for (i = 0; i < ARRAY_SIZE(ncsi_oem_gma_handlers); i++) { + if (ncsi_oem_gma_handlers[i].mfr_id == mf_id) { + if (ncsi_oem_gma_handlers[i].handler) + nch = &ncsi_oem_gma_handlers[i]; + break; + } + } + + if (!nch) { + netdev_err(nca->ndp->ndev.dev, + "NCSI: No GMA handler available for MFR-ID (0x%x)\n", + mf_id); + return -1; + } + + /* Set the flag for GMA command which should only be called once */ + nca->ndp->gma_flag = 1; + + /* Get Mac address from NCSI device */ + return nch->handler(nca); +} + +#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ + static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; @@ -701,7 +767,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; } + nd->state = ncsi_dev_state_config_oem_gma; + break; + case ncsi_dev_state_config_oem_gma: nd->state = ncsi_dev_state_config_clear_vids; + ret = -1; + +#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + nca.type = NCSI_PKT_CMD_OEM; + nca.package = np->id; + nca.channel = nc->id; + ndp->pending_req_num = 1; + ret = ncsi_gma_handler(&nca, nc->version.mf_id); +#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ + + if (ret < 0) + schedule_work(&ndp->work); + break; case ncsi_dev_state_config_clear_vids: case ncsi_dev_state_config_svf: diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index 0f2087c8d42a..4d3f06be38bd 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -165,6 +165,14 @@ struct ncsi_rsp_oem_pkt { unsigned char data[]; /* Payload data */ }; +/* Broadcom Response Data */ +struct ncsi_rsp_oem_bcm_pkt { + unsigned char ver; /* Payload Version */ + unsigned char type; /* OEM Command type */ + __be16 len; /* Payload Length */ + unsigned char data[]; /* Cmd specific Data */ +}; + /* Get Link Status */ struct ncsi_rsp_gls_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 85fa59afae34..77e07ba3f493 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -611,19 +611,59 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) return 0; } +/* Response handler for Broadcom command Get Mac Address */ +static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) +{ + struct ncsi_dev_priv *ndp = nr->ndp; + struct net_device *ndev = ndp->ndev.dev; + const struct net_device_ops *ops = ndev->netdev_ops; + struct ncsi_rsp_oem_pkt *rsp; + struct sockaddr saddr; + int ret = 0; + + /* Get the response header */ + rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); + + saddr.sa_family = ndev->type; + ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); + /* Increase mac address by 1 for BMC's address */ + saddr.sa_data[ETH_ALEN - 1]++; + ret = ops->ndo_set_mac_address(ndev, &saddr); + if (ret < 0) + netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); + + return ret; +} + +/* Response handler for Broadcom card */ +static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) +{ + struct ncsi_rsp_oem_bcm_pkt *bcm; + struct ncsi_rsp_oem_pkt *rsp; + + /* Get the response header */ + rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); + bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data); + + if (bcm->type == NCSI_OEM_BCM_CMD_GMA) + return ncsi_rsp_handler_oem_bcm_gma(nr); + return 0; +} + static struct ncsi_rsp_oem_handler { unsigned int mfr_id; int (*handler)(struct ncsi_request *nr); } ncsi_rsp_oem_handlers[] = { { NCSI_OEM_MFR_MLX_ID, NULL }, - { NCSI_OEM_MFR_BCM_ID, NULL } + { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm } }; /* Response handler for OEM command */ static int ncsi_rsp_handler_oem(struct ncsi_request *nr) { - struct ncsi_rsp_oem_pkt *rsp; struct ncsi_rsp_oem_handler *nrh = NULL; + struct ncsi_rsp_oem_pkt *rsp; unsigned int mfr_id, i; /* Get the response header */ diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 8a33dac4e805..e287da68d5fa 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -15,7 +15,7 @@ #define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) #define ipset_dereference_protected(p, set) \ - __ipset_dereference_protected(p, spin_is_locked(&(set)->lock)) + __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock)) #define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 185c633b6872..a3cc2ef8a48a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -483,7 +483,7 @@ static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, flush_delayed_work(&flowtable->gc_work); } -void nf_flow_table_cleanup(struct net *net, struct net_device *dev) +void nf_flow_table_cleanup(struct net_device *dev) { struct nf_flowtable *flowtable; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index b48545b84ce8..e7a50af1b3d6 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -350,7 +350,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, struct netlink_ext_ack *extack) { const struct nf_conntrack_l4proto *l4proto; - __u16 l3num; __u8 l4num; int ret; @@ -359,7 +358,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, !cda[CTA_TIMEOUT_DATA]) return -EINVAL; - l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find_get(l4num); diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 00db27dfd2ff..6f41dd74729d 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -30,32 +30,27 @@ EXPORT_SYMBOL_GPL(nf_osf_fingers); static inline int nf_osf_ttl(const struct sk_buff *skb, int ttl_check, unsigned char f_ttl) { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); const struct iphdr *ip = ip_hdr(skb); - - if (ttl_check != -1) { - if (ttl_check == NF_OSF_TTL_TRUE) - return ip->ttl == f_ttl; - if (ttl_check == NF_OSF_TTL_NOCHECK) - return 1; - else if (ip->ttl <= f_ttl) - return 1; - else { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); - int ret = 0; - - for_ifa(in_dev) { - if (inet_ifa_match(ip->saddr, ifa)) { - ret = (ip->ttl == f_ttl); - break; - } - } - endfor_ifa(in_dev); - - return ret; + int ret = 0; + + if (ttl_check == NF_OSF_TTL_TRUE) + return ip->ttl == f_ttl; + if (ttl_check == NF_OSF_TTL_NOCHECK) + return 1; + else if (ip->ttl <= f_ttl) + return 1; + + for_ifa(in_dev) { + if (inet_ifa_match(ip->saddr, ifa)) { + ret = (ip->ttl == f_ttl); + break; } } - return ip->ttl == f_ttl; + endfor_ifa(in_dev); + + return ret; } struct nf_osf_hdr_ctx { @@ -213,7 +208,7 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, if (!tcp) return false; - ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1; + ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { @@ -257,7 +252,8 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, EXPORT_SYMBOL_GPL(nf_osf_match); const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers) + const struct list_head *nf_osf_fingers, + const int ttl_check) { const struct iphdr *ip = ip_hdr(skb); const struct nf_osf_user_finger *f; @@ -275,7 +271,7 @@ const char *nf_osf_find(const struct sk_buff *skb, list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; - if (!nf_osf_match_one(skb, f, -1, &ctx)) + if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) continue; genre = f->genre; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 2cc1e0ef56e8..15cc62b293d6 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -46,8 +46,6 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, return nft_validate_register_load(priv->sreg_dev, sizeof(int)); } -static const struct nft_expr_ops nft_dup_netdev_ingress_ops; - static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_netdev *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index d6bab8c3cbb0..e82d9a966c45 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -201,7 +201,7 @@ static int flow_offload_netdev_event(struct notifier_block *this, if (event != NETDEV_DOWN) return NOTIFY_DONE; - nf_flow_table_cleanup(dev_net(dev), dev); + nf_flow_table_cleanup(dev); return NOTIFY_DONE; } diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 8abb9891cdf2..d7694e7255a0 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -53,8 +53,6 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, return nft_validate_register_load(priv->sreg_dev, sizeof(int)); } -static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; - static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_fwd_netdev *priv = nft_expr_priv(expr); @@ -169,8 +167,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return nft_validate_register_load(priv->sreg_addr, addr_len); } -static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; - static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_fwd_neigh *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index a35fb59ace73..0b452fd470c4 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -6,10 +6,12 @@ struct nft_osf { enum nft_registers dreg:8; + u8 ttl; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, + [NFTA_OSF_TTL] = { .type = NLA_U8 }, }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -33,7 +35,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - os_name = nf_osf_find(skb, nf_osf_fingers); + os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl); if (!os_name) strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); else @@ -46,6 +48,14 @@ static int nft_osf_init(const struct nft_ctx *ctx, { struct nft_osf *priv = nft_expr_priv(expr); int err; + u8 ttl; + + if (nla_get_u8(tb[NFTA_OSF_TTL])) { + ttl = nla_get_u8(tb[NFTA_OSF_TTL]); + if (ttl > 2) + return -EINVAL; + priv->ttl = ttl; + } priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, @@ -60,6 +70,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_osf *priv = nft_expr_priv(expr); + if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg)) goto nla_put_failure; diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 3cf71a2e375b..5322609f7662 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -118,12 +118,13 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode) static void nft_xfrm_state_get_key(const struct nft_xfrm *priv, struct nft_regs *regs, - const struct xfrm_state *state, - u8 family) + const struct xfrm_state *state) { u32 *dest = ®s->data[priv->dreg]; - if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) { + if (!xfrm_state_addr_ok(priv->key, + state->props.family, + state->props.mode)) { regs->verdict.code = NFT_BREAK; return; } @@ -169,7 +170,7 @@ static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv, } state = sp->xvec[priv->spnum]; - nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt)); + nft_xfrm_state_get_key(priv, regs, state); } static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv, @@ -184,7 +185,7 @@ static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv, if (i < priv->spnum) continue; - nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt)); + nft_xfrm_state_get_key(priv, regs, dst->xfrm); return; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index bf7bba80e24c..7a103553d10d 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -40,14 +40,8 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) { - const struct xt_osf_info *info = p->matchinfo; - struct net *net = xt_net(p); - - if (!info) - return false; - return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p), - xt_out(p), info, net, nf_osf_fingers); + xt_out(p), p->matchinfo, xt_net(p), nf_osf_fingers); } static struct xt_match xt_osf_match = { diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index fceae245eb03..10d61a6eed71 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -11,6 +11,11 @@ #include <linux/netfilter/xt_quota.h> #include <linux/module.h> +struct xt_quota_priv { + spinlock_t lock; + uint64_t quota; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); MODULE_DESCRIPTION("Xtables: countdown quota match"); @@ -21,48 +26,54 @@ static bool quota_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct xt_quota_info *q = (void *)par->matchinfo; - u64 current_count = atomic64_read(&q->counter); + struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; - u64 old_count, new_count; - - do { - if (current_count == 1) - return ret; - if (current_count <= skb->len) { - atomic64_set(&q->counter, 1); - return ret; - } - old_count = current_count; - new_count = current_count - skb->len; - current_count = atomic64_cmpxchg(&q->counter, old_count, - new_count); - } while (current_count != old_count); - return !ret; + + spin_lock_bh(&priv->lock); + if (priv->quota >= skb->len) { + priv->quota -= skb->len; + ret = !ret; + } else { + /* we do not allow even small packets from now on */ + priv->quota = 0; + } + spin_unlock_bh(&priv->lock); + + return ret; } static int quota_mt_check(const struct xt_mtchk_param *par) { struct xt_quota_info *q = par->matchinfo; - BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64)); - if (q->flags & ~XT_QUOTA_MASK) return -EINVAL; - if (atomic64_read(&q->counter) > q->quota + 1) - return -ERANGE; - if (atomic64_read(&q->counter) == 0) - atomic64_set(&q->counter, q->quota + 1); + q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); + if (q->master == NULL) + return -ENOMEM; + + spin_lock_init(&q->master->lock); + q->master->quota = q->quota; return 0; } +static void quota_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_quota_info *q = par->matchinfo; + + kfree(q->master); +} + static struct xt_match quota_mt_reg __read_mostly = { .name = "quota", .revision = 0, .family = NFPROTO_UNSPEC, .match = quota_mt, .checkentry = quota_mt_check, + .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), + .usersize = offsetof(struct xt_quota_info, master), .me = THIS_MODULE, }; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index e0d8ca03169a..44860505246d 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_connection *conn; - struct rxrpc_peer *peer; + struct rxrpc_peer *peer = NULL; struct rxrpc_call *call; _enter(""); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index cad0691c2bb4..0906e51d3cfb 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -139,7 +139,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) udp_sk(usk)->gro_complete = NULL; udp_encap_enable(); -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) if (local->srx.transport.family == AF_INET6) udpv6_encap_enable(); #endif diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0f0b499d1202..189418888839 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -572,7 +572,8 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.flags ^= RXRPC_CLIENT_INITIATED; whdr.flags &= RXRPC_CLIENT_INITIATED; - ret = kernel_sendmsg(local->socket, &msg, iov, 2, size); + ret = kernel_sendmsg(local->socket, &msg, + iov, ioc, size); if (ret < 0) trace_rxrpc_tx_fail(local->debug_id, 0, ret, rxrpc_tx_point_reject); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7feb611c7258..bc05af89fc38 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -197,6 +197,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + rxrpc_put_peer(peer); _leave(""); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 43c8559aca56..f427a1e00e7e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,6 +31,8 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> +extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; + /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -1304,7 +1306,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1454,7 +1456,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1570,7 +1572,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh = NULL; int err; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -1937,7 +1939,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; @@ -2055,7 +2057,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, + err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, cb->extack); if (err) return err; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cf5c714ae786..022bca98bde6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1318,10 +1318,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) return 0; } -/* - * Delete/get qdisc. - */ - const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, [TCA_OPTIONS] = { .type = NLA_NESTED }, @@ -1334,6 +1330,10 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, }; +/* + * Delete/get qdisc. + */ + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -2070,7 +2070,8 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tcm->tcm_parent) { q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); - if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + if (q && q != root && + tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; return 0; } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 297d9cf960b9..a827a1f562bf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1450,7 +1450,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) /* Get the lowest pmtu of all the transports. */ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(t, sctp_dst_mtu(t->dst)); + sctp_transport_update_pmtu(t, + atomic_read(&t->mtu_info)); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 9bbc5f92c941..5c36a99882ed 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -395,6 +395,7 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; if (sock_owned_by_user(sk)) { + atomic_set(&t->mtu_info, pmtu); asoc->pmtu_pending = 1; t->pmtu_pending = 1; return; diff --git a/net/sctp/output.c b/net/sctp/output.c index 7f849b01ec8e..67939ad99c01 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -120,6 +120,12 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, sctp_assoc_sync_pmtu(asoc); } + if (asoc->pmtu_pending) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + asoc->pmtu_pending = 0; + } + /* If there a is a prepend chunk stick it on the list before * any other chunks get appended. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 42191ed9902b..9cb854b05342 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -385,9 +385,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, asoc->outqueue.outstanding_bytes -= sctp_data_size(chk); } - msg_len -= SCTP_DATA_SNDSIZE(chk) + - sizeof(struct sk_buff) + - sizeof(struct sctp_chunk); + msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); if (msg_len <= 0) break; } @@ -421,9 +419,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } - msg_len -= SCTP_DATA_SNDSIZE(chk) + - sizeof(struct sk_buff) + - sizeof(struct sctp_chunk); + msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); sctp_chunk_free(chk); if (msg_len <= 0) break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f73e9d38d5ba..fc0386e8ff23 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -83,7 +83,7 @@ #include <net/sctp/stream_sched.h> /* Forward declarations for internal helper functions. */ -static int sctp_writeable(struct sock *sk); +static bool sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, size_t msg_len); @@ -119,25 +119,10 @@ static void sctp_enter_memory_pressure(struct sock *sk) /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { - int amt; + struct sock *sk = asoc->base.sk; - if (asoc->ep->sndbuf_policy) - amt = asoc->sndbuf_used; - else - amt = sk_wmem_alloc_get(asoc->base.sk); - - if (amt >= asoc->base.sk->sk_sndbuf) { - if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) - amt = 0; - else { - amt = sk_stream_wspace(asoc->base.sk); - if (amt < 0) - amt = 0; - } - } else { - amt = asoc->base.sk->sk_sndbuf - amt; - } - return amt; + return asoc->ep->sndbuf_policy ? sk->sk_sndbuf - asoc->sndbuf_used + : sk_stream_wspace(sk); } /* Increment the used sndbuf space count of the corresponding association by @@ -166,12 +151,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) /* Save the chunk pointer in skb for sctp_wfree to use later. */ skb_shinfo(chunk->skb)->destructor_arg = chunk; - asoc->sndbuf_used += SCTP_DATA_SNDSIZE(chunk) + - sizeof(struct sk_buff) + - sizeof(struct sctp_chunk); - refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); - sk->sk_wmem_queued += chunk->skb->truesize; + asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk); + sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk); sk_mem_charge(sk, chunk->skb->truesize); } @@ -271,11 +253,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + if (asoc && (asoc->base.sk != sk || asoc->base.dead)) + asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); - if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) - return NULL; - return asoc; } @@ -1928,10 +1909,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, asoc->pmtu_pending = 0; } - if (sctp_wspace(asoc) < msg_len) + if (sctp_wspace(asoc) < (int)msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); - if (!sctp_wspace(asoc)) { + if (sctp_wspace(asoc) <= 0) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) @@ -1946,8 +1927,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sp->strm_interleave) { timeo = sock_sndtimeo(sk, 0); err = sctp_wait_for_connect(asoc, &timeo); - if (err) + if (err) { + err = -ESRCH; goto err; + } } else { wait_connect = true; } @@ -7100,14 +7083,14 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (policy & ~SCTP_PR_SCTP_MASK) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); if (!asoc) goto out; - if (policy == SCTP_PR_SCTP_NONE) { + if (policy & SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { @@ -7159,7 +7142,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, } policy = params.sprstat_policy; - if (policy & ~SCTP_PR_SCTP_MASK) + if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL))) goto out; asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); @@ -7175,7 +7158,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, goto out; } - if (policy == SCTP_PR_SCTP_NONE) { + if (policy == SCTP_PR_SCTP_ALL) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { @@ -8460,17 +8443,11 @@ static void sctp_wfree(struct sk_buff *skb) struct sctp_association *asoc = chunk->asoc; struct sock *sk = asoc->base.sk; - asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk) + - sizeof(struct sk_buff) + - sizeof(struct sctp_chunk); - - WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc)); - - /* - * This undoes what is done via sctp_set_owner_w and sk_mem_charge - */ - sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk); + asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk); + WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), + &sk->sk_wmem_alloc)); if (chunk->shkey) { struct sctp_shared_key *shkey = chunk->shkey; @@ -8544,7 +8521,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, goto do_error; if (signal_pending(current)) goto do_interrupted; - if (msg_len <= sctp_wspace(asoc)) + if ((int)msg_len <= sctp_wspace(asoc)) break; /* Let another process have a go. Since we are going @@ -8619,14 +8596,9 @@ void sctp_write_space(struct sock *sk) * UDP-style sockets or TCP-style sockets, this code should work. * - Daisy */ -static int sctp_writeable(struct sock *sk) +static bool sctp_writeable(struct sock *sk) { - int amt = 0; - - amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amt < 0) - amt = 0; - return amt; + return sk->sk_sndbuf > sk->sk_wmem_queued; } /* Wait for an association to go into ESTABLISHED state. If timeout is 0, diff --git a/net/socket.c b/net/socket.c index 713dc4833d40..b68801c7d0ab 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2875,9 +2875,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) + (void __user *)&rxnfc->fs.ring_cookie)) + return -EFAULT; + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + if (put_user(rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + } else if (copy_in_user(&rxnfc->rule_cnt, + &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) return -EFAULT; } diff --git a/net/tipc/group.c b/net/tipc/group.c index e82f13cb2dc5..06fee142f09f 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -666,6 +666,7 @@ static void tipc_group_create_event(struct tipc_group *grp, struct sk_buff *skb; struct tipc_msg *hdr; + memset(&evt, 0, sizeof(evt)); evt.event = event; evt.found_lower = m->instance; evt.found_upper = m->instance; diff --git a/net/tipc/link.c b/net/tipc/link.c index f6552e4f4b43..201c3b5bc96b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1041,6 +1041,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r, if (r->last_retransm != buf_seqno(skb)) { r->last_retransm = buf_seqno(skb); r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); + r->stale_cnt = 0; } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) { link_retransmit_failure(l, skb); if (link_is_bc_sndlink(l)) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 51b4b96f89db..3cfeb9df64b0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_del(&publ->binding_node); + list_del_rcu(&publ->binding_node); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; - list_for_each_entry(publ, pls, binding_node) { + list_for_each_entry_rcu(publ, pls, binding_node) { /* Prepare next buffer: */ if (!skb) { skb = named_prepare_buf(net, PUBLICATION, msg_rem, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d1edfa3cad61..98d34fb61744 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -225,6 +225,8 @@ static inline void unix_release_addr(struct unix_address *addr) static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { + *hashp = 0; + if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 0577cd49aa72..07156f43d295 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -754,6 +754,8 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk->sk_destruct = xsk_destruct; sk_refcnt_debug_inc(sk); + sock_set_flag(sk, SOCK_RCU_FREE); + xs = xdp_sk(sk); mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 61be810389d8..ce66323102f9 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -13,7 +13,7 @@ static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) { - return ntohl(addr->a6[2] ^ addr->a6[3]); + return jhash2((__force u32 *)addr->a6, 4, 0); } static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, @@ -26,8 +26,7 @@ static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, const xfrm_address_t *saddr) { - return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); + return __xfrm6_addr_hash(daddr) ^ __xfrm6_addr_hash(saddr); } static inline u32 __bits2mask32(__u8 bits) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index be3520e429c9..684c0bc01e2c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -131,7 +131,7 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; - memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + memset(sp->ovec, 0, sizeof(sp->ovec)); if (src) { int i; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index dc5b20bf29cf..d679fa0f44b3 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -116,6 +116,9 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi) static void xfrmi_dev_free(struct net_device *dev) { + struct xfrm_if *xi = netdev_priv(dev); + + gro_cells_destroy(&xi->gro_cells); free_percpu(dev->tstats); } @@ -561,9 +564,6 @@ static void xfrmi_get_stats64(struct net_device *dev, { int cpu; - if (!dev->tstats) - return; - for_each_possible_cpu(cpu) { struct pcpu_sw_netstats *stats; struct pcpu_sw_netstats tmp; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f094d4b3520d..119a427d9b2b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -632,9 +632,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); @@ -774,9 +774,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, chain); + hlist_add_head_rcu(&policy->bydst, chain); __xfrm_policy_link(policy, dir); /* After previous checking, family can either be AF_INET or AF_INET6 */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..fd23d5778ea1 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -377,6 +377,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 07548de5c988..251be353f950 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_HPAGE_1M 156 #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 120037496f77..5afb11b30fca 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -36,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void) __tracing_path_set("", mnt); - return mnt; + return tracing_path; } static const char *tracing_path_debugfs_mount(void) @@ -49,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void) __tracing_path_set("tracing/", mnt); - return mnt; + return tracing_path; } const char *tracing_path_mount(void) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6d1a03c7523..e30d20fb482d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -833,7 +833,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') endif endif ifndef JDIR diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5224ade3d5af..0be411695379 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -635,7 +635,7 @@ $(LIBPERF_IN): prepare FORCE $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 76e12bcd1765..b2188e623e22 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -981,6 +981,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, + .event_update = perf_event__process_event_update, .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index d40498f2cb1e..635c09fda1d9 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -188,7 +188,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -199,7 +199,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -210,7 +210,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -221,7 +221,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -232,7 +232,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -243,7 +243,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -254,7 +254,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -265,7 +265,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index 16034bfd06dd..8755693d86c6 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -187,7 +187,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -198,7 +198,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -209,7 +209,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -220,7 +220,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -231,7 +231,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -242,7 +242,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -253,7 +253,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -264,7 +264,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0cd42150f712..bc646185f8d9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max } *size += sizeof(struct cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } @@ -1560,26 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } -try_again: + al->map = map_groups__find(mg, al->addr); - if (al->map == NULL) { - /* - * If this is outside of all known maps, and is a negative - * address, try to look it up in the kernel dso, as it might be - * a vsyscall or vdso (which executes in user-mode). - * - * XXX This is nasty, we should have a symbol list in the - * "[vdso]" dso, but for now lets use the old trick of looking - * in the whole kernel symbol list. - */ - if (cpumode == PERF_RECORD_MISC_USER && machine && - mg != &machine->kmaps && - machine__kernel_ip(machine, al->addr)) { - mg = &machine->kmaps; - load_map = true; - goto try_again; - } - } else { + if (al->map != NULL) { /* * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1a61628a1c12..e596ae358c4d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1089,6 +1089,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } + if (evsel->own_cpus) + evsel->attr.read_format |= PERF_FORMAT_ID; + /* * Apply event specific term settings, * it overloads any global configuration. diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index afd68524ffa9..7799788f662f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -930,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - __u64 w = 0; - int fbit; - - for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) - w |= (1ULL << fbit); + int w; - return w; + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; } /* diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 09d6746e6ec8..e767c4a9d4d2 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso, struct symbol *inline_sym; char *demangled = NULL; + if (!funcname) + funcname = "??"; + if (dso) { demangled = dso__demangle_sym(dso, 0, funcname); if (demangled) diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh index a72df93cf1f8..128f0ab24307 100755 --- a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh +++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh @@ -141,6 +141,10 @@ echo "Import devices from localhost - should work" src/usbip attach -r localhost -b $busid; echo "==============================================================" +# Wait for sysfs file to be updated. Without this sleep, usbip port +# shows no imported devices. +sleep 3; + echo "List imported devices - expect to see imported devices"; src/usbip port; echo "==============================================================" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc new file mode 100644 index 000000000000..88e6c3f43006 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test synthetic_events syntax parser" + +echo > synthetic_events + +# synthetic event must have a field +! echo "myevent" >> synthetic_events +echo "myevent u64 var1" >> synthetic_events + +# synthetic event must be found in synthetic_events +grep "myevent[[:space:]]u64 var1" synthetic_events + +# it is not possible to add same name event +! echo "myevent u64 var2" >> synthetic_events + +# Non-append open will cleanup all events and add new one +echo "myevent u64 var2" > synthetic_events + +# multiple fields with different spaces +echo "myevent u64 var1; u64 var2;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events +echo "myevent u64 var1 ;u64 var2" > synthetic_events +grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events + +# test field types +echo "myevent u32 var" > synthetic_events +echo "myevent u16 var" > synthetic_events +echo "myevent u8 var" > synthetic_events +echo "myevent s64 var" > synthetic_events +echo "myevent s32 var" > synthetic_events +echo "myevent s16 var" > synthetic_events +echo "myevent s8 var" > synthetic_events + +echo "myevent char var" > synthetic_events +echo "myevent int var" > synthetic_events +echo "myevent long var" > synthetic_events +echo "myevent pid_t var" > synthetic_events + +echo "myevent unsigned char var" > synthetic_events +echo "myevent unsigned int var" > synthetic_events +echo "myevent unsigned long var" > synthetic_events +grep "myevent[[:space:]]unsigned long var" synthetic_events + +# test string type +echo "myevent char var[10]" > synthetic_events +grep "myevent[[:space:]]char\[10\] var" synthetic_events + +do_reset + +exit 0 diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index cad14cd0ea92..b5277106df1f 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -437,14 +437,19 @@ void enable_fastopen(void) } } -static struct rlimit rlim_old, rlim_new; +static struct rlimit rlim_old; static __attribute__((constructor)) void main_ctor(void) { getrlimit(RLIMIT_MEMLOCK, &rlim_old); - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); - setrlimit(RLIMIT_MEMLOCK, &rlim_new); + + if (rlim_old.rlim_cur != RLIM_INFINITY) { + struct rlimit rlim_new; + + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); + } } static __attribute__((destructor)) void main_dtor(void) |