From fff200caf6f9179dd9a7fc67acd659e614c3f72f Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 10 May 2018 16:28:35 +0900 Subject: e1000e: Ignore TSYNCRXCTL when getting I219 clock attributes There have been multiple reports of crashes that look like kernel: RIP: 0010:[] timecounter_read+0xf/0x50 [...] kernel: Call Trace: kernel: [] e1000e_phc_gettime+0x2f/0x60 [e1000e] kernel: [] e1000e_systim_overflow_work+0x1d/0x80 [e1000e] kernel: [] process_one_work+0x155/0x440 kernel: [] worker_thread+0x116/0x4b0 kernel: [] kthread+0xd2/0xf0 kernel: [] ret_from_fork+0x3f/0x70 These can be traced back to the fact that e1000e_systim_reset() skips the timecounter_init() call if e1000e_get_base_timinca() returns -EINVAL, which leads to a null deref in timecounter_read(). Commit 83129b37ef35 ("e1000e: fix systim issues", v4.2-rc1) reworked e1000e_get_base_timinca() in such a way that it can return -EINVAL for e1000_pch_spt if the SYSCFI bit is not set in TSYNCRXCTL. Some experimentation has shown that on I219 (e1000_pch_spt, "MAC: 12") adapters, the E1000_TSYNCRXCTL_SYSCFI flag is unstable; TSYNCRXCTL reads sometimes don't have the SYSCFI bit set. Retrying the read shortly after finds the bit to be set. This was observed at boot (probe) but also link up and link down. Moreover, the phc (PTP Hardware Clock) seems to operate normally even after reads where SYSCFI=0. Therefore, remove this register read and unconditionally set the clock parameters. Reported-by: Achim Mildenberger Message-Id: <20180425065243.g5mqewg5irkwgwgv@f2> Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1075876 Fixes: 83129b37ef35 ("e1000e: fix systim issues") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d3fef7fefea8..acf1e8b52b8e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3527,15 +3527,12 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) } break; case e1000_pch_spt: - if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { - /* Stable 24MHz frequency */ - incperiod = INCPERIOD_24MHZ; - incvalue = INCVALUE_24MHZ; - shift = INCVALUE_SHIFT_24MHZ; - adapter->cc.shift = shift; - break; - } - return -EINVAL; + /* Stable 24MHz frequency */ + incperiod = INCPERIOD_24MHZ; + incvalue = INCVALUE_24MHZ; + shift = INCVALUE_SHIFT_24MHZ; + adapter->cc.shift = shift; + break; case e1000_pch_cnp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ -- cgit From 85d63445f41125dafeddda74e5b13b7eefac9407 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 10 May 2018 12:20:13 -0700 Subject: Documentation: e100: Update the Intel 10/100 driver doc Over the years, several of the links have changed or are no longer valid so update them. In addition, the default values were incorrect for a couple of parameters. Converted the text file to the reStructuredText (RST) format, since the Linux kernel documentation now uses this format for documentation. Signed-off-by: Jeff Kirsher Tested-by: Aaron Brown --- Documentation/networking/e100.rst | 177 +++++++++++++++++++++++++++++++++++ Documentation/networking/e100.txt | 183 ------------------------------------- Documentation/networking/index.rst | 1 + MAINTAINERS | 2 +- 4 files changed, 179 insertions(+), 184 deletions(-) create mode 100644 Documentation/networking/e100.rst delete mode 100644 Documentation/networking/e100.txt diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst new file mode 100644 index 000000000000..d4d837027925 --- /dev/null +++ b/Documentation/networking/e100.rst @@ -0,0 +1,177 @@ +Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters +============================================================== + +June 1, 2018 + +Contents +======== + +- In This Release +- Identifying Your Adapter +- Building and Installation +- Driver Configuration Parameters +- Additional Configurations +- Known Issues +- Support + + +In This Release +=============== + +This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of +Adapters. This driver includes support for Itanium(R)2-based systems. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel PRO/100 adapter. + +The following features are now available in supported kernels: + - Native VLANs + - Channel Bonding (teaming) + - SNMP + +Channel Bonding documentation can be found in the Linux kernel source: +/Documentation/networking/bonding.txt + + +Identifying Your Adapter +======================== + +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + +Driver Configuration Parameters +=============================== + +The default value for each parameter is generally the recommended setting, +unless otherwise noted. + +Rx Descriptors: Number of receive descriptors. A receive descriptor is a data + structure that describes a receive buffer and its attributes to the network + controller. The data in the descriptor is used by the controller to write + data from the controller to host memory. In the 3.x.x driver the valid range + for this parameter is 64-256. The default value is 256. This parameter can be + changed using the command:: + + ethtool -G eth? rx n + + Where n is the number of desired Rx descriptors. + +Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data + structure that describes a transmit buffer and its attributes to the network + controller. The data in the descriptor is used by the controller to read + data from the host memory to the controller. In the 3.x.x driver the valid + range for this parameter is 64-256. The default value is 128. This parameter + can be changed using the command:: + + ethtool -G eth? tx n + + Where n is the number of desired Tx descriptors. + +Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by + default. The ethtool utility can be used as follows to force speed/duplex.:: + + ethtool -s eth? autoneg off speed {10|100} duplex {full|half} + + NOTE: setting the speed/duplex to incorrect values will cause the link to + fail. + +Event Log Message Level: The driver uses the message level flag to log events + to syslog. The message level can be set at driver load time. It can also be + set using the command:: + + ethtool -s eth? msglvl n + + +Additional Configurations +========================= + + Configuring the Driver on Different Distributions + ------------------------------------------------- + + Configuring a network driver to load properly when the system is started is + distribution dependent. Typically, the configuration process involves adding + an alias line to /etc/modprobe.d/*.conf as well as editing other system + startup scripts and/or configuration files. Many popular Linux + distributions ship with tools to make these changes for you. To learn the + proper way to configure a network device for your system, refer to your + distribution documentation. If during this process you are asked for the + driver or module name, the name for the Linux Base Driver for the Intel + PRO/100 Family of Adapters is e100. + + As an example, if you install the e100 driver for two PRO/100 adapters + (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/ + + alias eth0 e100 + alias eth1 e100 + + Viewing Link Messages + --------------------- + In order to see link messages and other Intel driver information on your + console, you must set the dmesg level up to six. This can be done by + entering the following on the command line before loading the e100 driver:: + + dmesg -n 6 + + If you wish to see all messages issued by the driver, including debug + messages, set the dmesg level to eight. + + NOTE: This setting is not saved across reboots. + + + ethtool + ------- + + The driver utilizes the ethtool interface for driver configuration and + diagnostics, as well as displaying statistical information. The ethtool + version 1.6 or later is required for this functionality. + + The latest release of ethtool can be found from + https://www.kernel.org/pub/software/network/ethtool/ + + Enabling Wake on LAN* (WoL) + --------------------------- + WoL is provided through the ethtool* utility. For instructions on enabling + WoL with ethtool, refer to the ethtool man page. + + WoL will be enabled on the system during the next shut down or reboot. For + this driver version, in order to enable WoL, the e100 driver must be + loaded when shutting down or rebooting the system. + + NAPI + ---- + + NAPI (Rx polling mode) is supported in the e100 driver. + + See https://wiki.linuxfoundation.org/networking/napi for more information + on NAPI. + + Multiple Interfaces on Same Ethernet Broadcast Network + ------------------------------------------------------ + + Due to the default ARP behavior on Linux, it is not possible to have + one system on two IP networks in the same Ethernet broadcast domain + (non-partitioned switch) behave as expected. All Ethernet interfaces + will respond to IP traffic for any IP address assigned to the system. + This results in unbalanced receive traffic. + + If you have multiple interfaces in a server, either turn on ARP + filtering by + + (1) entering:: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter + (this only works if your kernel's version is higher than 2.4.5), or + + (2) installing the interfaces in separate broadcast domains (either + in different switches or in a switch partitioned to VLANs). + + +Support +======= +For general information, go to the Intel support website at: +http://www.intel.com/support/ + +or the Intel Wired Networking project hosted by Sourceforge at: +http://sourceforge.net/projects/e1000 +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to e1000-devel@lists.sf.net. diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt deleted file mode 100644 index 54810b82c01a..000000000000 --- a/Documentation/networking/e100.txt +++ /dev/null @@ -1,183 +0,0 @@ -Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters -============================================================== - -March 15, 2011 - -Contents -======== - -- In This Release -- Identifying Your Adapter -- Building and Installation -- Driver Configuration Parameters -- Additional Configurations -- Known Issues -- Support - - -In This Release -=============== - -This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of -Adapters. This driver includes support for Itanium(R)2-based systems. - -For questions related to hardware requirements, refer to the documentation -supplied with your Intel PRO/100 adapter. - -The following features are now available in supported kernels: - - Native VLANs - - Channel Bonding (teaming) - - SNMP - -Channel Bonding documentation can be found in the Linux kernel source: -/Documentation/networking/bonding.txt - - -Identifying Your Adapter -======================== - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/network/adapter/pro100/21397.htm - -For the latest Intel network drivers for Linux, refer to the following -website. In the search field, enter your adapter name or type, or use the -networking link on the left to search for your adapter: - - http://downloadfinder.intel.com/scripts-df/support_intel.asp - -Driver Configuration Parameters -=============================== - -The default value for each parameter is generally the recommended setting, -unless otherwise noted. - -Rx Descriptors: Number of receive descriptors. A receive descriptor is a data - structure that describes a receive buffer and its attributes to the network - controller. The data in the descriptor is used by the controller to write - data from the controller to host memory. In the 3.x.x driver the valid range - for this parameter is 64-256. The default value is 64. This parameter can be - changed using the command: - - ethtool -G eth? rx n, where n is the number of desired rx descriptors. - -Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data - structure that describes a transmit buffer and its attributes to the network - controller. The data in the descriptor is used by the controller to read - data from the host memory to the controller. In the 3.x.x driver the valid - range for this parameter is 64-256. The default value is 64. This parameter - can be changed using the command: - - ethtool -G eth? tx n, where n is the number of desired tx descriptors. - -Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by - default. The ethtool utility can be used as follows to force speed/duplex. - - ethtool -s eth? autoneg off speed {10|100} duplex {full|half} - - NOTE: setting the speed/duplex to incorrect values will cause the link to - fail. - -Event Log Message Level: The driver uses the message level flag to log events - to syslog. The message level can be set at driver load time. It can also be - set using the command: - - ethtool -s eth? msglvl n - - -Additional Configurations -========================= - - Configuring the Driver on Different Distributions - ------------------------------------------------- - - Configuring a network driver to load properly when the system is started is - distribution dependent. Typically, the configuration process involves adding - an alias line to /etc/modprobe.d/*.conf as well as editing other system - startup scripts and/or configuration files. Many popular Linux - distributions ship with tools to make these changes for you. To learn the - proper way to configure a network device for your system, refer to your - distribution documentation. If during this process you are asked for the - driver or module name, the name for the Linux Base Driver for the Intel - PRO/100 Family of Adapters is e100. - - As an example, if you install the e100 driver for two PRO/100 adapters - (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/ - - alias eth0 e100 - alias eth1 e100 - - Viewing Link Messages - --------------------- - In order to see link messages and other Intel driver information on your - console, you must set the dmesg level up to six. This can be done by - entering the following on the command line before loading the e100 driver: - - dmesg -n 8 - - If you wish to see all messages issued by the driver, including debug - messages, set the dmesg level to eight. - - NOTE: This setting is not saved across reboots. - - - ethtool - ------- - - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The ethtool - version 1.6 or later is required for this functionality. - - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool/ - - Enabling Wake on LAN* (WoL) - --------------------------- - WoL is provided through the ethtool* utility. For instructions on enabling - WoL with ethtool, refer to the ethtool man page. - - WoL will be enabled on the system during the next shut down or reboot. For - this driver version, in order to enable WoL, the e100 driver must be - loaded when shutting down or rebooting the system. - - NAPI - ---- - - NAPI (Rx polling mode) is supported in the e100 driver. - - See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI. - - Multiple Interfaces on Same Ethernet Broadcast Network - ------------------------------------------------------ - - Due to the default ARP behavior on Linux, it is not possible to have - one system on two IP networks in the same Ethernet broadcast domain - (non-partitioned switch) behave as expected. All Ethernet interfaces - will respond to IP traffic for any IP address assigned to the system. - This results in unbalanced receive traffic. - - If you have multiple interfaces in a server, either turn on ARP - filtering by - - (1) entering: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter - (this only works if your kernel's version is higher than 2.4.5), or - - (2) installing the interfaces in separate broadcast domains (either - in different switches or in a switch partitioned to VLANs). - - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - - or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related to the -issue to e1000-devel@lists.sourceforge.net. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index cbd9bdd4a79e..d11a62977edd 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -10,6 +10,7 @@ Contents: batman-adv can dpaa2/index + e100 kapi z8530book msg_zerocopy diff --git a/MAINTAINERS b/MAINTAINERS index 0ae0dbf0e15e..d68981ca9896 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7089,7 +7089,7 @@ Q: http://patchwork.ozlabs.org/project/intel-wired-lan/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git S: Supported -F: Documentation/networking/e100.txt +F: Documentation/networking/e100.rst F: Documentation/networking/e1000.txt F: Documentation/networking/e1000e.txt F: Documentation/networking/igb.txt -- cgit From 228046e76189ce542f15321b760e380551468017 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 10 May 2018 12:55:38 -0700 Subject: Documentation: e1000: Update kernel documentation Updated the e1000.txt kernel documentation with the latest information. Also convert the text file to reStructuredText (RST) format, since the Linux kernel documentation now uses this format for documentation. Signed-off-by: Jeff Kirsher Tested-by: Aaron Brown --- Documentation/networking/e1000.rst | 422 +++++++++++++++++++++++++++++++++ Documentation/networking/e1000.txt | 461 ------------------------------------- Documentation/networking/index.rst | 1 + MAINTAINERS | 2 +- 4 files changed, 424 insertions(+), 462 deletions(-) create mode 100644 Documentation/networking/e1000.rst delete mode 100644 Documentation/networking/e1000.txt diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst new file mode 100644 index 000000000000..616848940e63 --- /dev/null +++ b/Documentation/networking/e1000.rst @@ -0,0 +1,422 @@ +Linux* Base Driver for Intel(R) Ethernet Network Connection +=========================================================== + +Intel Gigabit Linux driver. +Copyright(c) 1999 - 2013 Intel Corporation. + +Contents +======== + +- Identifying Your Adapter +- Command Line Parameters +- Speed and Duplex Configuration +- Additional Configurations +- Support + +Identifying Your Adapter +======================== + +For more information on how to identify your adapter, go to the Adapter & +Driver ID Guide at: + + http://support.intel.com/support/go/network/adapter/idguide.htm + +For the latest Intel network drivers for Linux, refer to the following +website. In the search field, enter your adapter name or type, or use the +networking link on the left to search for your adapter: + + http://support.intel.com/support/go/network/adapter/home.htm + +Command Line Parameters +======================= + +The default value for each parameter is generally the recommended setting, +unless otherwise noted. + +NOTES: For more information about the AutoNeg, Duplex, and Speed + parameters, see the "Speed and Duplex Configuration" section in + this document. + + For more information about the InterruptThrottleRate, + RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay + parameters, see the application note at: + http://www.intel.com/design/network/applnots/ap450.htm + +AutoNeg +------- +(Supported only on adapters with copper connections) +Valid Range: 0x01-0x0F, 0x20-0x2F +Default Value: 0x2F + +This parameter is a bit-mask that specifies the speed and duplex settings +advertised by the adapter. When this parameter is used, the Speed and +Duplex parameters must not be specified. + +NOTE: Refer to the Speed and Duplex section of this readme for more + information on the AutoNeg parameter. + +Duplex +------ +(Supported only on adapters with copper connections) +Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) +Default Value: 0 + +This defines the direction in which data is allowed to flow. Can be +either one or two-directional. If both Duplex and the link partner are +set to auto-negotiate, the board auto-detects the correct duplex. If the +link partner is forced (either full or half), Duplex defaults to half- +duplex. + +FlowControl +----------- +Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) +Default Value: Reads flow control settings from the EEPROM + +This parameter controls the automatic generation(Tx) and response(Rx) +to Ethernet PAUSE frames. + +InterruptThrottleRate +--------------------- +(not supported on Intel(R) 82542, 82543 or 82544-based adapters) +Valid Range: 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative, + 4=simplified balancing) +Default Value: 3 + +The driver can limit the amount of interrupts per second that the adapter +will generate for incoming packets. It does this by writing a value to the +adapter that is based on the maximum amount of interrupts that the adapter +will generate per second. + +Setting InterruptThrottleRate to a value greater or equal to 100 +will program the adapter to send out a maximum of that many interrupts +per second, even if more packets have come in. This reduces interrupt +load on the system and can lower CPU utilization under heavy load, +but will increase latency as packets are not processed as quickly. + +The default behaviour of the driver previously assumed a static +InterruptThrottleRate value of 8000, providing a good fallback value for +all traffic types,but lacking in small packet performance and latency. +The hardware can handle many more small packets per second however, and +for this reason an adaptive interrupt moderation algorithm was implemented. + +Since 7.3.x, the driver has two adaptive modes (setting 1 or 3) in which +it dynamically adjusts the InterruptThrottleRate value based on the traffic +that it receives. After determining the type of incoming traffic in the last +timeframe, it will adjust the InterruptThrottleRate to an appropriate value +for that traffic. + +The algorithm classifies the incoming traffic every interval into +classes. Once the class is determined, the InterruptThrottleRate value is +adjusted to suit that traffic type the best. There are three classes defined: +"Bulk traffic", for large amounts of packets of normal size; "Low latency", +for small amounts of traffic and/or a significant percentage of small +packets; and "Lowest latency", for almost completely small packets or +minimal traffic. + +In dynamic conservative mode, the InterruptThrottleRate value is set to 4000 +for traffic that falls in class "Bulk traffic". If traffic falls in the "Low +latency" or "Lowest latency" class, the InterruptThrottleRate is increased +stepwise to 20000. This default mode is suitable for most applications. + +For situations where low latency is vital such as cluster or +grid computing, the algorithm can reduce latency even more when +InterruptThrottleRate is set to mode 1. In this mode, which operates +the same as mode 3, the InterruptThrottleRate will be increased stepwise to +70000 for traffic in class "Lowest latency". + +In simplified mode the interrupt rate is based on the ratio of TX and +RX traffic. If the bytes per second rate is approximately equal, the +interrupt rate will drop as low as 2000 interrupts per second. If the +traffic is mostly transmit or mostly receive, the interrupt rate could +be as high as 8000. + +Setting InterruptThrottleRate to 0 turns off any interrupt moderation +and may improve small packet latency, but is generally not suitable +for bulk throughput traffic. + +NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and + RxAbsIntDelay parameters. In other words, minimizing the receive + and/or transmit absolute delays does not force the controller to + generate more interrupts than what the Interrupt Throttle Rate + allows. + +CAUTION: If you are using the Intel(R) PRO/1000 CT Network Connection + (controller 82547), setting InterruptThrottleRate to a value + greater than 75,000, may hang (stop transmitting) adapters + under certain network conditions. If this occurs a NETDEV + WATCHDOG message is logged in the system event log. In + addition, the controller is automatically reset, restoring + the network connection. To eliminate the potential for the + hang, ensure that InterruptThrottleRate is set no greater + than 75,000 and is not set to 0. + +NOTE: When e1000 is loaded with default settings and multiple adapters + are in use simultaneously, the CPU utilization may increase non- + linearly. In order to limit the CPU utilization without impacting + the overall throughput, we recommend that you load the driver as + follows:: + + modprobe e1000 InterruptThrottleRate=3000,3000,3000 + + This sets the InterruptThrottleRate to 3000 interrupts/sec for + the first, second, and third instances of the driver. The range + of 2000 to 3000 interrupts per second works on a majority of + systems and is a good starting point, but the optimal value will + be platform-specific. If CPU utilization is not a concern, use + RX_POLLING (NAPI) and default driver settings. + +RxDescriptors +------------- +Valid Range: 48-256 for 82542 and 82543-based adapters + 48-4096 for all other supported adapters +Default Value: 256 + +This value specifies the number of receive buffer descriptors allocated +by the driver. Increasing this value allows the driver to buffer more +incoming packets, at the expense of increased system memory utilization. + +Each descriptor is 16 bytes. A receive buffer is also allocated for each +descriptor and can be either 2048, 4096, 8192, or 16384 bytes, depending +on the MTU setting. The maximum MTU size is 16110. + +NOTE: MTU designates the frame size. It only needs to be set for Jumbo + Frames. Depending on the available system resources, the request + for a higher number of receive descriptors may be denied. In this + case, use a lower number. + +RxIntDelay +---------- +Valid Range: 0-65535 (0=off) +Default Value: 0 + +This value delays the generation of receive interrupts in units of 1.024 +microseconds. Receive interrupt reduction can improve CPU efficiency if +properly tuned for specific network traffic. Increasing this value adds +extra latency to frame reception and can end up decreasing the throughput +of TCP traffic. If the system is reporting dropped receives, this value +may be set too high, causing the driver to run out of available receive +descriptors. + +CAUTION: When setting RxIntDelay to a value other than 0, adapters may + hang (stop transmitting) under certain network conditions. If + this occurs a NETDEV WATCHDOG message is logged in the system + event log. In addition, the controller is automatically reset, + restoring the network connection. To eliminate the potential + for the hang ensure that RxIntDelay is set to 0. + +RxAbsIntDelay +------------- +(This parameter is supported only on 82540, 82545 and later adapters.) +Valid Range: 0-65535 (0=off) +Default Value: 128 + +This value, in units of 1.024 microseconds, limits the delay in which a +receive interrupt is generated. Useful only if RxIntDelay is non-zero, +this value ensures that an interrupt is generated after the initial +packet is received within the set amount of time. Proper tuning, +along with RxIntDelay, may improve traffic throughput in specific network +conditions. + +Speed +----- +(This parameter is supported only on adapters with copper connections.) +Valid Settings: 0, 10, 100, 1000 +Default Value: 0 (auto-negotiate at all supported speeds) + +Speed forces the line speed to the specified value in megabits per second +(Mbps). If this parameter is not specified or is set to 0 and the link +partner is set to auto-negotiate, the board will auto-detect the correct +speed. Duplex should also be set when Speed is set to either 10 or 100. + +TxDescriptors +------------- +Valid Range: 48-256 for 82542 and 82543-based adapters + 48-4096 for all other supported adapters +Default Value: 256 + +This value is the number of transmit descriptors allocated by the driver. +Increasing this value allows the driver to queue more transmits. Each +descriptor is 16 bytes. + +NOTE: Depending on the available system resources, the request for a + higher number of transmit descriptors may be denied. In this case, + use a lower number. + +TxIntDelay +---------- +Valid Range: 0-65535 (0=off) +Default Value: 8 + +This value delays the generation of transmit interrupts in units of +1.024 microseconds. Transmit interrupt reduction can improve CPU +efficiency if properly tuned for specific network traffic. If the +system is reporting dropped transmits, this value may be set too high +causing the driver to run out of available transmit descriptors. + +TxAbsIntDelay +------------- +(This parameter is supported only on 82540, 82545 and later adapters.) +Valid Range: 0-65535 (0=off) +Default Value: 32 + +This value, in units of 1.024 microseconds, limits the delay in which a +transmit interrupt is generated. Useful only if TxIntDelay is non-zero, +this value ensures that an interrupt is generated after the initial +packet is sent on the wire within the set amount of time. Proper tuning, +along with TxIntDelay, may improve traffic throughput in specific +network conditions. + +XsumRX +------ +(This parameter is NOT supported on the 82542-based adapter.) +Valid Range: 0-1 +Default Value: 1 + +A value of '1' indicates that the driver should enable IP checksum +offload for received packets (both UDP and TCP) to the adapter hardware. + +Copybreak +--------- +Valid Range: 0-xxxxxxx (0=off) +Default Value: 256 +Usage: modprobe e1000.ko copybreak=128 + +Driver copies all packets below or equaling this size to a fresh RX +buffer before handing it up the stack. + +This parameter is different than other parameters, in that it is a +single (not 1,1,1 etc.) parameter applied to all driver instances and +it is also available during runtime at +/sys/module/e1000/parameters/copybreak + +SmartPowerDownEnable +-------------------- +Valid Range: 0-1 +Default Value: 0 (disabled) + +Allows PHY to turn off in lower power states. The user can turn off +this parameter in supported chipsets. + +Speed and Duplex Configuration +============================== + +Three keywords are used to control the speed and duplex configuration. +These keywords are Speed, Duplex, and AutoNeg. + +If the board uses a fiber interface, these keywords are ignored, and the +fiber interface board only links at 1000 Mbps full-duplex. + +For copper-based boards, the keywords interact as follows: + + The default operation is auto-negotiate. The board advertises all + supported speed and duplex combinations, and it links at the highest + common speed and duplex mode IF the link partner is set to auto-negotiate. + + If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps + is advertised (The 1000BaseT spec requires auto-negotiation.) + + If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- + negotiation is disabled, and the AutoNeg parameter is ignored. Partner + SHOULD also be forced. + +The AutoNeg parameter is used when more control is required over the +auto-negotiation process. It should be used when you wish to control which +speed and duplex combinations are advertised during the auto-negotiation +process. + +The parameter may be specified as either a decimal or hexadecimal value as +determined by the bitmap below. + +Bit position 7 6 5 4 3 2 1 0 +Decimal Value 128 64 32 16 8 4 2 1 +Hex value 80 40 20 10 8 4 2 1 +Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 +Duplex Full Full Half Full Half + +Some examples of using AutoNeg: + + modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half) + modprobe e1000 AutoNeg=1 (Same as above) + modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full) + modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full) + modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half) + modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100 + Half) + modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full) + modprobe e1000 AutoNeg=32 (Same as above) + +Note that when this parameter is used, Speed and Duplex must not be specified. + +If the link partner is forced to a specific speed and duplex, then this +parameter should not be used. Instead, use the Speed and Duplex parameters +previously mentioned to force the adapter to the same speed and duplex. + +Additional Configurations +========================= + + Jumbo Frames + ------------ + Jumbo Frames support is enabled by changing the MTU to a value larger than + the default of 1500. Use the ifconfig command to increase the MTU size. + For example:: + + ifconfig eth mtu 9000 up + + This setting is not saved across reboots. It can be made permanent if + you add:: + + MTU=9000 + + to the file /etc/sysconfig/network-scripts/ifcfg-eth. This example + applies to the Red Hat distributions; other distributions may store this + setting in a different location. + + Notes: + Degradation in throughput performance may be observed in some Jumbo frames + environments. If this is observed, increasing the application's socket buffer + size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. + See the specific application manual and /usr/src/linux*/Documentation/ + networking/ip-sysctl.txt for more details. + + - The maximum MTU setting for Jumbo Frames is 16110. This value coincides + with the maximum Jumbo Frames size of 16128. + + - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in + poor performance or loss of link. + + - Adapters based on the Intel(R) 82542 and 82573V/E controller do not + support Jumbo Frames. These correspond to the following product names: + Intel(R) PRO/1000 Gigabit Server Adapter + Intel(R) PRO/1000 PM Network Connection + + ethtool + ------- + The driver utilizes the ethtool interface for driver configuration and + diagnostics, as well as displaying statistical information. The ethtool + version 1.6 or later is required for this functionality. + + The latest release of ethtool can be found from + https://www.kernel.org/pub/software/network/ethtool/ + + Enabling Wake on LAN* (WoL) + --------------------------- + WoL is configured through the ethtool* utility. + + WoL will be enabled on the system during the next shut down or reboot. + For this driver version, in order to enable WoL, the e1000 driver must be + loaded when shutting down or rebooting the system. + +Support +======= + +For general information, go to the Intel support website at: + + http://support.intel.com + +or the Intel Wired Networking project hosted by Sourceforge at: + + http://sourceforge.net/projects/e1000 + +If an issue is identified with the released source code on the supported +kernel with a supported adapter, email the specific information related +to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt deleted file mode 100644 index 1f6ed848363d..000000000000 --- a/Documentation/networking/e1000.txt +++ /dev/null @@ -1,461 +0,0 @@ -Linux* Base Driver for Intel(R) Ethernet Network Connection -=========================================================== - -Intel Gigabit Linux driver. -Copyright(c) 1999 - 2013 Intel Corporation. - -Contents -======== - -- Identifying Your Adapter -- Command Line Parameters -- Speed and Duplex Configuration -- Additional Configurations -- Support - -Identifying Your Adapter -======================== - -For more information on how to identify your adapter, go to the Adapter & -Driver ID Guide at: - - http://support.intel.com/support/go/network/adapter/idguide.htm - -For the latest Intel network drivers for Linux, refer to the following -website. In the search field, enter your adapter name or type, or use the -networking link on the left to search for your adapter: - - http://support.intel.com/support/go/network/adapter/home.htm - -Command Line Parameters -======================= - -The default value for each parameter is generally the recommended setting, -unless otherwise noted. - -NOTES: For more information about the AutoNeg, Duplex, and Speed - parameters, see the "Speed and Duplex Configuration" section in - this document. - - For more information about the InterruptThrottleRate, - RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay - parameters, see the application note at: - http://www.intel.com/design/network/applnots/ap450.htm - -AutoNeg -------- -(Supported only on adapters with copper connections) -Valid Range: 0x01-0x0F, 0x20-0x2F -Default Value: 0x2F - -This parameter is a bit-mask that specifies the speed and duplex settings -advertised by the adapter. When this parameter is used, the Speed and -Duplex parameters must not be specified. - -NOTE: Refer to the Speed and Duplex section of this readme for more - information on the AutoNeg parameter. - -Duplex ------- -(Supported only on adapters with copper connections) -Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full) -Default Value: 0 - -This defines the direction in which data is allowed to flow. Can be -either one or two-directional. If both Duplex and the link partner are -set to auto-negotiate, the board auto-detects the correct duplex. If the -link partner is forced (either full or half), Duplex defaults to half- -duplex. - -FlowControl ------------ -Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx) -Default Value: Reads flow control settings from the EEPROM - -This parameter controls the automatic generation(Tx) and response(Rx) -to Ethernet PAUSE frames. - -InterruptThrottleRate ---------------------- -(not supported on Intel(R) 82542, 82543 or 82544-based adapters) -Valid Range: 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative, - 4=simplified balancing) -Default Value: 3 - -The driver can limit the amount of interrupts per second that the adapter -will generate for incoming packets. It does this by writing a value to the -adapter that is based on the maximum amount of interrupts that the adapter -will generate per second. - -Setting InterruptThrottleRate to a value greater or equal to 100 -will program the adapter to send out a maximum of that many interrupts -per second, even if more packets have come in. This reduces interrupt -load on the system and can lower CPU utilization under heavy load, -but will increase latency as packets are not processed as quickly. - -The default behaviour of the driver previously assumed a static -InterruptThrottleRate value of 8000, providing a good fallback value for -all traffic types,but lacking in small packet performance and latency. -The hardware can handle many more small packets per second however, and -for this reason an adaptive interrupt moderation algorithm was implemented. - -Since 7.3.x, the driver has two adaptive modes (setting 1 or 3) in which -it dynamically adjusts the InterruptThrottleRate value based on the traffic -that it receives. After determining the type of incoming traffic in the last -timeframe, it will adjust the InterruptThrottleRate to an appropriate value -for that traffic. - -The algorithm classifies the incoming traffic every interval into -classes. Once the class is determined, the InterruptThrottleRate value is -adjusted to suit that traffic type the best. There are three classes defined: -"Bulk traffic", for large amounts of packets of normal size; "Low latency", -for small amounts of traffic and/or a significant percentage of small -packets; and "Lowest latency", for almost completely small packets or -minimal traffic. - -In dynamic conservative mode, the InterruptThrottleRate value is set to 4000 -for traffic that falls in class "Bulk traffic". If traffic falls in the "Low -latency" or "Lowest latency" class, the InterruptThrottleRate is increased -stepwise to 20000. This default mode is suitable for most applications. - -For situations where low latency is vital such as cluster or -grid computing, the algorithm can reduce latency even more when -InterruptThrottleRate is set to mode 1. In this mode, which operates -the same as mode 3, the InterruptThrottleRate will be increased stepwise to -70000 for traffic in class "Lowest latency". - -In simplified mode the interrupt rate is based on the ratio of TX and -RX traffic. If the bytes per second rate is approximately equal, the -interrupt rate will drop as low as 2000 interrupts per second. If the -traffic is mostly transmit or mostly receive, the interrupt rate could -be as high as 8000. - -Setting InterruptThrottleRate to 0 turns off any interrupt moderation -and may improve small packet latency, but is generally not suitable -for bulk throughput traffic. - -NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and - RxAbsIntDelay parameters. In other words, minimizing the receive - and/or transmit absolute delays does not force the controller to - generate more interrupts than what the Interrupt Throttle Rate - allows. - -CAUTION: If you are using the Intel(R) PRO/1000 CT Network Connection - (controller 82547), setting InterruptThrottleRate to a value - greater than 75,000, may hang (stop transmitting) adapters - under certain network conditions. If this occurs a NETDEV - WATCHDOG message is logged in the system event log. In - addition, the controller is automatically reset, restoring - the network connection. To eliminate the potential for the - hang, ensure that InterruptThrottleRate is set no greater - than 75,000 and is not set to 0. - -NOTE: When e1000 is loaded with default settings and multiple adapters - are in use simultaneously, the CPU utilization may increase non- - linearly. In order to limit the CPU utilization without impacting - the overall throughput, we recommend that you load the driver as - follows: - - modprobe e1000 InterruptThrottleRate=3000,3000,3000 - - This sets the InterruptThrottleRate to 3000 interrupts/sec for - the first, second, and third instances of the driver. The range - of 2000 to 3000 interrupts per second works on a majority of - systems and is a good starting point, but the optimal value will - be platform-specific. If CPU utilization is not a concern, use - RX_POLLING (NAPI) and default driver settings. - -RxDescriptors -------------- -Valid Range: 80-256 for 82542 and 82543-based adapters - 80-4096 for all other supported adapters -Default Value: 256 - -This value specifies the number of receive buffer descriptors allocated -by the driver. Increasing this value allows the driver to buffer more -incoming packets, at the expense of increased system memory utilization. - -Each descriptor is 16 bytes. A receive buffer is also allocated for each -descriptor and can be either 2048, 4096, 8192, or 16384 bytes, depending -on the MTU setting. The maximum MTU size is 16110. - -NOTE: MTU designates the frame size. It only needs to be set for Jumbo - Frames. Depending on the available system resources, the request - for a higher number of receive descriptors may be denied. In this - case, use a lower number. - -RxIntDelay ----------- -Valid Range: 0-65535 (0=off) -Default Value: 0 - -This value delays the generation of receive interrupts in units of 1.024 -microseconds. Receive interrupt reduction can improve CPU efficiency if -properly tuned for specific network traffic. Increasing this value adds -extra latency to frame reception and can end up decreasing the throughput -of TCP traffic. If the system is reporting dropped receives, this value -may be set too high, causing the driver to run out of available receive -descriptors. - -CAUTION: When setting RxIntDelay to a value other than 0, adapters may - hang (stop transmitting) under certain network conditions. If - this occurs a NETDEV WATCHDOG message is logged in the system - event log. In addition, the controller is automatically reset, - restoring the network connection. To eliminate the potential - for the hang ensure that RxIntDelay is set to 0. - -RxAbsIntDelay -------------- -(This parameter is supported only on 82540, 82545 and later adapters.) -Valid Range: 0-65535 (0=off) -Default Value: 128 - -This value, in units of 1.024 microseconds, limits the delay in which a -receive interrupt is generated. Useful only if RxIntDelay is non-zero, -this value ensures that an interrupt is generated after the initial -packet is received within the set amount of time. Proper tuning, -along with RxIntDelay, may improve traffic throughput in specific network -conditions. - -Speed ------ -(This parameter is supported only on adapters with copper connections.) -Valid Settings: 0, 10, 100, 1000 -Default Value: 0 (auto-negotiate at all supported speeds) - -Speed forces the line speed to the specified value in megabits per second -(Mbps). If this parameter is not specified or is set to 0 and the link -partner is set to auto-negotiate, the board will auto-detect the correct -speed. Duplex should also be set when Speed is set to either 10 or 100. - -TxDescriptors -------------- -Valid Range: 80-256 for 82542 and 82543-based adapters - 80-4096 for all other supported adapters -Default Value: 256 - -This value is the number of transmit descriptors allocated by the driver. -Increasing this value allows the driver to queue more transmits. Each -descriptor is 16 bytes. - -NOTE: Depending on the available system resources, the request for a - higher number of transmit descriptors may be denied. In this case, - use a lower number. - -TxDescriptorStep ----------------- -Valid Range: 1 (use every Tx Descriptor) - 4 (use every 4th Tx Descriptor) - -Default Value: 1 (use every Tx Descriptor) - -On certain non-Intel architectures, it has been observed that intense TX -traffic bursts of short packets may result in an improper descriptor -writeback. If this occurs, the driver will report a "TX Timeout" and reset -the adapter, after which the transmit flow will restart, though data may -have stalled for as much as 10 seconds before it resumes. - -The improper writeback does not occur on the first descriptor in a system -memory cache-line, which is typically 32 bytes, or 4 descriptors long. - -Setting TxDescriptorStep to a value of 4 will ensure that all TX descriptors -are aligned to the start of a system memory cache line, and so this problem -will not occur. - -NOTES: Setting TxDescriptorStep to 4 effectively reduces the number of - TxDescriptors available for transmits to 1/4 of the normal allocation. - This has a possible negative performance impact, which may be - compensated for by allocating more descriptors using the TxDescriptors - module parameter. - - There are other conditions which may result in "TX Timeout", which will - not be resolved by the use of the TxDescriptorStep parameter. As the - issue addressed by this parameter has never been observed on Intel - Architecture platforms, it should not be used on Intel platforms. - -TxIntDelay ----------- -Valid Range: 0-65535 (0=off) -Default Value: 64 - -This value delays the generation of transmit interrupts in units of -1.024 microseconds. Transmit interrupt reduction can improve CPU -efficiency if properly tuned for specific network traffic. If the -system is reporting dropped transmits, this value may be set too high -causing the driver to run out of available transmit descriptors. - -TxAbsIntDelay -------------- -(This parameter is supported only on 82540, 82545 and later adapters.) -Valid Range: 0-65535 (0=off) -Default Value: 64 - -This value, in units of 1.024 microseconds, limits the delay in which a -transmit interrupt is generated. Useful only if TxIntDelay is non-zero, -this value ensures that an interrupt is generated after the initial -packet is sent on the wire within the set amount of time. Proper tuning, -along with TxIntDelay, may improve traffic throughput in specific -network conditions. - -XsumRX ------- -(This parameter is NOT supported on the 82542-based adapter.) -Valid Range: 0-1 -Default Value: 1 - -A value of '1' indicates that the driver should enable IP checksum -offload for received packets (both UDP and TCP) to the adapter hardware. - -Copybreak ---------- -Valid Range: 0-xxxxxxx (0=off) -Default Value: 256 -Usage: insmod e1000.ko copybreak=128 - -Driver copies all packets below or equaling this size to a fresh RX -buffer before handing it up the stack. - -This parameter is different than other parameters, in that it is a -single (not 1,1,1 etc.) parameter applied to all driver instances and -it is also available during runtime at -/sys/module/e1000/parameters/copybreak - -SmartPowerDownEnable --------------------- -Valid Range: 0-1 -Default Value: 0 (disabled) - -Allows PHY to turn off in lower power states. The user can turn off -this parameter in supported chipsets. - -KumeranLockLoss ---------------- -Valid Range: 0-1 -Default Value: 1 (enabled) - -This workaround skips resetting the PHY at shutdown for the initial -silicon releases of ICH8 systems. - -Speed and Duplex Configuration -============================== - -Three keywords are used to control the speed and duplex configuration. -These keywords are Speed, Duplex, and AutoNeg. - -If the board uses a fiber interface, these keywords are ignored, and the -fiber interface board only links at 1000 Mbps full-duplex. - -For copper-based boards, the keywords interact as follows: - - The default operation is auto-negotiate. The board advertises all - supported speed and duplex combinations, and it links at the highest - common speed and duplex mode IF the link partner is set to auto-negotiate. - - If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps - is advertised (The 1000BaseT spec requires auto-negotiation.) - - If Speed = 10 or 100, then both Speed and Duplex should be set. Auto- - negotiation is disabled, and the AutoNeg parameter is ignored. Partner - SHOULD also be forced. - -The AutoNeg parameter is used when more control is required over the -auto-negotiation process. It should be used when you wish to control which -speed and duplex combinations are advertised during the auto-negotiation -process. - -The parameter may be specified as either a decimal or hexadecimal value as -determined by the bitmap below. - -Bit position 7 6 5 4 3 2 1 0 -Decimal Value 128 64 32 16 8 4 2 1 -Hex value 80 40 20 10 8 4 2 1 -Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10 -Duplex Full Full Half Full Half - -Some examples of using AutoNeg: - - modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half) - modprobe e1000 AutoNeg=1 (Same as above) - modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full) - modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full) - modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half) - modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100 - Half) - modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full) - modprobe e1000 AutoNeg=32 (Same as above) - -Note that when this parameter is used, Speed and Duplex must not be specified. - -If the link partner is forced to a specific speed and duplex, then this -parameter should not be used. Instead, use the Speed and Duplex parameters -previously mentioned to force the adapter to the same speed and duplex. - -Additional Configurations -========================= - - Jumbo Frames - ------------ - Jumbo Frames support is enabled by changing the MTU to a value larger than - the default of 1500. Use the ifconfig command to increase the MTU size. - For example: - - ifconfig eth mtu 9000 up - - This setting is not saved across reboots. It can be made permanent if - you add: - - MTU=9000 - - to the file /etc/sysconfig/network-scripts/ifcfg-eth. This example - applies to the Red Hat distributions; other distributions may store this - setting in a different location. - - Notes: - Degradation in throughput performance may be observed in some Jumbo frames - environments. If this is observed, increasing the application's socket buffer - size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. - See the specific application manual and /usr/src/linux*/Documentation/ - networking/ip-sysctl.txt for more details. - - - The maximum MTU setting for Jumbo Frames is 16110. This value coincides - with the maximum Jumbo Frames size of 16128. - - - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in - poor performance or loss of link. - - - Adapters based on the Intel(R) 82542 and 82573V/E controller do not - support Jumbo Frames. These correspond to the following product names: - Intel(R) PRO/1000 Gigabit Server Adapter - Intel(R) PRO/1000 PM Network Connection - - ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The ethtool - version 1.6 or later is required for this functionality. - - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool/ - - Enabling Wake on LAN* (WoL) - --------------------------- - WoL is configured through the ethtool* utility. - - WoL will be enabled on the system during the next shut down or reboot. - For this driver version, in order to enable WoL, the e1000 driver must be - loaded when shutting down or rebooting the system. - -Support -======= - -For general information, go to the Intel support website at: - - http://support.intel.com - -or the Intel Wired Networking project hosted by Sourceforge at: - - http://sourceforge.net/projects/e1000 - -If an issue is identified with the released source code on the supported -kernel with a supported adapter, email the specific information related -to the issue to e1000-devel@lists.sf.net diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index d11a62977edd..fec8588a588e 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -11,6 +11,7 @@ Contents: can dpaa2/index e100 + e1000 kapi z8530book msg_zerocopy diff --git a/MAINTAINERS b/MAINTAINERS index d68981ca9896..32472fbf4d6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7090,7 +7090,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git S: Supported F: Documentation/networking/e100.rst -F: Documentation/networking/e1000.txt +F: Documentation/networking/e1000.rst F: Documentation/networking/e1000e.txt F: Documentation/networking/igb.txt F: Documentation/networking/igbvf.txt -- cgit From 1ec2297c5cacefd9198a3cc4a27751b6a2203cf3 Mon Sep 17 00:00:00 2001 From: Joanna Yurdal Date: Wed, 16 May 2018 13:14:11 +0200 Subject: igb: Clear TSICR interrupts together with ICR Issuing "ip link set up/down" can block TSICR interrupts, what results in missing PTP Tx timestamp and no PPS pulse generation. Problem happens when the link is set up with the TSICR interrupts pending. ICR is cleared before enabling interrupts, while TSICR is not. When all TSICR interrupts are pending at this moment, time_sync interrupt will never be generated. TSICR should be cleared as well. In order to reproduce the issue: 1. Setup linux with IEEE 1588 grandmaster and PPS output enabled 2. Continue setting link up/down with random intervals between commands 3. Wait until PPS is not generated ( only one pulse is generated and PPS dies), and ptp4l complains constantly about Tx timeout. Signed-off-by: Joanna Yurdal Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 78574c06635b..20b728218d20 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2058,6 +2058,7 @@ int igb_up(struct igb_adapter *adapter) igb_assign_vector(adapter->q_vector[0], 0); /* Clear any pending interrupts. */ + rd32(E1000_TSICR); rd32(E1000_ICR); igb_irq_enable(adapter); @@ -3865,6 +3866,7 @@ static int __igb_open(struct net_device *netdev, bool resuming) napi_enable(&(adapter->q_vector[i]->napi)); /* Clear any pending interrupts. */ + rd32(E1000_TSICR); rd32(E1000_ICR); igb_irq_enable(adapter); -- cgit From 06140c793db51476585ee9abf032845e221982cb Mon Sep 17 00:00:00 2001 From: Sergey Nemov Date: Fri, 18 May 2018 11:58:40 +0200 Subject: igb: Wait 10ms just once after TX queues reset Move 10ms sleep out of function resetting TX queue. Reset all the TX queues in one turn and wait for all of them just once. Use usleep_range() instead of mdelay() in order not to affect transmission on other interfaces. Signed-off-by: Sergey Nemov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 20b728218d20..c33821d2afb3 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4055,11 +4055,6 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, u64 tdba = ring->dma; int reg_idx = ring->reg_idx; - /* disable the queue */ - wr32(E1000_TXDCTL(reg_idx), 0); - wrfl(); - mdelay(10); - wr32(E1000_TDLEN(reg_idx), ring->count * sizeof(union e1000_adv_tx_desc)); wr32(E1000_TDBAL(reg_idx), @@ -4090,8 +4085,16 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, **/ static void igb_configure_tx(struct igb_adapter *adapter) { + struct e1000_hw *hw = &adapter->hw; int i; + /* disable the queues */ + for (i = 0; i < adapter->num_tx_queues; i++) + wr32(E1000_TXDCTL(adapter->tx_ring[i]->reg_idx), 0); + + wrfl(); + usleep_range(10000, 20000); + for (i = 0; i < adapter->num_tx_queues; i++) igb_configure_tx_ring(adapter, adapter->tx_ring[i]); } -- cgit From 4be87727d4aebe36913d9f2a6806724cb593516f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 22 May 2018 11:44:29 -0400 Subject: ixgbevf: Fix coexistence of malicious driver detection with XDP In the case of the VF driver it is supposed to provide a context descriptor that allows us to provide information about the header offsets inside of the frame. However in the case of XDP we don't really have any of that information since the data is minimally processed. As a result we were seeing malicious driver detection (MDD) events being triggered when the PF had that functionality enabled. To address this I have added a bit of new code that will "prime" the XDP ring by providing one context descriptor that assumes the minimal setup of an Ethernet frame which is an L2 header length of 14. With just that we can provide enough information to make the hardware happy so that we don't trigger MDD events. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 1 + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 36 ++++++++++++++++++----- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 70c75681495f..56a1031dcc07 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -76,6 +76,7 @@ enum ixgbevf_ring_state_t { __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, + __IXGBEVF_TX_XDP_RING_PRIMED, }; #define ring_is_xdp(ring) \ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 083041129539..2d5a706c3c29 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -991,24 +991,45 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, return IXGBEVF_XDP_CONSUMED; /* record the location of the first descriptor for this packet */ - tx_buffer = &ring->tx_buffer_info[ring->next_to_use]; - tx_buffer->bytecount = len; - tx_buffer->gso_segs = 1; - tx_buffer->protocol = 0; - i = ring->next_to_use; - tx_desc = IXGBEVF_TX_DESC(ring, i); + tx_buffer = &ring->tx_buffer_info[i]; dma_unmap_len_set(tx_buffer, len, len); dma_unmap_addr_set(tx_buffer, dma, dma); tx_buffer->data = xdp->data; - tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_buffer->bytecount = len; + tx_buffer->gso_segs = 1; + tx_buffer->protocol = 0; + + /* Populate minimal context descriptor that will provide for the + * fact that we are expected to process Ethernet frames. + */ + if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) { + struct ixgbe_adv_tx_context_desc *context_desc; + + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); + + context_desc = IXGBEVF_TX_CTXTDESC(ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->seqnum_seed = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + i = 1; + } /* put descriptor type bits */ cmd_type = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DCMD_IFCS; cmd_type |= len | IXGBE_TXD_CMD; + + tx_desc = IXGBEVF_TX_DESC(ring, i); + tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); tx_desc->read.olinfo_status = cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | @@ -1688,6 +1709,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, sizeof(struct ixgbevf_tx_buffer) * ring->count); clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); + clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), txdctl); -- cgit From 7d6446db1bcb1008b84db098f4629664cd8b06a6 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Tue, 22 May 2018 09:18:46 -0700 Subject: ixgbevf: fix possible race in the reset subtask Extend the RTNL lock in ixgbevf_reset_subtask() to protect the state bits check in addition to the call to ixgbevf_reinit_locked(). This is to make sure that we get the most up-to-date values for the bits and avoid a possible race when going down. Suggested-by: Zhiping du Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 2d5a706c3c29..59416eddd840 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3141,15 +3141,17 @@ static void ixgbevf_reset_subtask(struct ixgbevf_adapter *adapter) if (!test_and_clear_bit(__IXGBEVF_RESET_REQUESTED, &adapter->state)) return; + rtnl_lock(); /* If we're already down or resetting, just bail */ if (test_bit(__IXGBEVF_DOWN, &adapter->state) || test_bit(__IXGBEVF_REMOVING, &adapter->state) || - test_bit(__IXGBEVF_RESETTING, &adapter->state)) + test_bit(__IXGBEVF_RESETTING, &adapter->state)) { + rtnl_unlock(); return; + } adapter->tx_timeout_count++; - rtnl_lock(); ixgbevf_reinit_locked(adapter); rtnl_unlock(); } -- cgit From e9c721837da2a73798243ded051ff0ffdbc6066b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 23 May 2018 20:08:13 +0800 Subject: ixgbe: introduce a helper to simplify code ixgbe_dbg_reg_ops_read and ixgbe_dbg_netdev_ops_read copy-pasting the same code except for ixgbe_dbg_netdev_ops_buf/ixgbe_dbg_reg_ops_buf, so introduce a helper ixgbe_dbg_common_ops_read to remove redundant code. Signed-off-by: YueHaibing Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c | 57 +++++++++--------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c index 55fe8114fe99..50dfb02fa34c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -10,15 +10,9 @@ static struct dentry *ixgbe_dbg_root; static char ixgbe_dbg_reg_ops_buf[256] = ""; -/** - * ixgbe_dbg_reg_ops_read - read for reg_ops datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t ixgbe_dbg_common_ops_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos, + char *dbg_buf) { struct ixgbe_adapter *adapter = filp->private_data; char *buf; @@ -29,8 +23,7 @@ static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, return 0; buf = kasprintf(GFP_KERNEL, "%s: %s\n", - adapter->netdev->name, - ixgbe_dbg_reg_ops_buf); + adapter->netdev->name, dbg_buf); if (!buf) return -ENOMEM; @@ -45,6 +38,20 @@ static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, return len; } +/** + * ixgbe_dbg_reg_ops_read - read for reg_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + return ixgbe_dbg_common_ops_read(filp, buffer, count, ppos, + ixgbe_dbg_reg_ops_buf); +} + /** * ixgbe_dbg_reg_ops_write - write into reg_ops datum * @filp: the opened file @@ -121,33 +128,11 @@ static char ixgbe_dbg_netdev_ops_buf[256] = ""; * @count: the size of the user's buffer * @ppos: file position offset **/ -static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp, - char __user *buffer, +static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - struct ixgbe_adapter *adapter = filp->private_data; - char *buf; - int len; - - /* don't allow partial reads */ - if (*ppos != 0) - return 0; - - buf = kasprintf(GFP_KERNEL, "%s: %s\n", - adapter->netdev->name, - ixgbe_dbg_netdev_ops_buf); - if (!buf) - return -ENOMEM; - - if (count < strlen(buf)) { - kfree(buf); - return -ENOSPC; - } - - len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); - - kfree(buf); - return len; + return ixgbe_dbg_common_ops_read(filp, buffer, count, ppos, + ixgbe_dbg_netdev_ops_buf); } /** -- cgit From cc5b114dcf986bfd8e4c37bf65d1b7b1e5290ac6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 May 2018 11:07:20 +0200 Subject: bpf, i40e: add meta data support Add support for XDP meta data when using build skb variant of the i40e driver. Implementation is analogous to the existing ixgbe and ixgbevf support for meta data from 366a88fe2f40 ("bpf, ixgbe: add meta data support") and be8333322eff ("ixgbevf: Add support for meta data"). With the build skb variant we get 192 bytes of extra headroom which can be used for encaps or meta data. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Tested-by: John Fastabend Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 39 +++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 9b698c5acd05..105a26f447c0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2032,6 +2032,21 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, #if L1_CACHE_BYTES < 128 prefetch(xdp->data + L1_CACHE_BYTES); #endif + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via i40e_build_skb(). The latter + * provides us currently with 192 bytes of headroom. + * + * For i40e_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * change in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ /* allocate a skb to store the frags */ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, @@ -2083,19 +2098,25 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, struct xdp_buff *xdp) { - unsigned int size = xdp->data_end - xdp->data; + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(I40E_SKB_PAD + size); + SKB_DATA_ALIGN(I40E_SKB_PAD + + (xdp->data_end - + xdp->data_hard_start)); #endif struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(xdp->data); + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points exactly as xdp->data, otherwise we + * likely have a consumer accessing first few bytes of meta + * data, and then actual data. + */ + prefetch(xdp->data_meta); #if L1_CACHE_BYTES < 128 - prefetch(xdp->data + L1_CACHE_BYTES); + prefetch(xdp->data_meta + L1_CACHE_BYTES); #endif /* build an skb around the page buffer */ skb = build_skb(xdp->data_hard_start, truesize); @@ -2103,8 +2124,10 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, return NULL; /* update pointers within the skb to store the data */ - skb_reserve(skb, I40E_SKB_PAD); - __skb_put(skb, size); + skb_reserve(skb, I40E_SKB_PAD + (xdp->data - xdp->data_hard_start)); + __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); /* buffer is used by skb, update page_offset */ #if (PAGE_SIZE < 8192) @@ -2341,7 +2364,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; - xdp_set_data_meta_invalid(&xdp); + xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - i40e_rx_offset(rx_ring); xdp.data_end = xdp.data + size; -- cgit From 88adce4ea8f96b5191df2bea76905814cc3814e2 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Wed, 30 May 2018 09:05:12 -0700 Subject: ixgbe: fix possible race in reset subtask Similar to ixgbevf, the same possibility for race exists. Extend the RTNL lock in ixgbe_reset_subtask() to protect the state bits; this is to make sure that we get the most up-to-date values for the bits and avoid a possible race when going down. Signed-off-by: Tony Nguyen Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ba3035c08572..dd8a3a037c2f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7621,17 +7621,19 @@ static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state)) return; + rtnl_lock(); /* If we're already down, removing or resetting, just bail */ if (test_bit(__IXGBE_DOWN, &adapter->state) || test_bit(__IXGBE_REMOVING, &adapter->state) || - test_bit(__IXGBE_RESETTING, &adapter->state)) + test_bit(__IXGBE_RESETTING, &adapter->state)) { + rtnl_unlock(); return; + } ixgbe_dump(adapter); netdev_err(adapter->netdev, "Reset adapter\n"); adapter->tx_timeout_count++; - rtnl_lock(); ixgbe_reinit_locked(adapter); rtnl_unlock(); } -- cgit From 2a8a15526d319eed948bb38ae9f5900e2bee8565 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 30 May 2018 11:20:04 -0700 Subject: ixgbe: check ipsec ip addr against mgmt filters Make sure we don't try to offload the decryption of an incoming packet that should get delivered to the management engine. This is a corner case that will likely be very seldom seen, but could really confuse someone if they were to hit it. Suggested-by: Jesse Brandeburg Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 88 ++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 99b170f1efd1..e1c976271bbd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -444,6 +444,89 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, return 0; } +/** + * ixgbe_ipsec_check_mgmt_ip - make sure there is no clash with mgmt IP filters + * @xs: pointer to transformer state struct + **/ +static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) +{ + struct net_device *dev = xs->xso.dev; + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct ixgbe_hw *hw = &adapter->hw; + u32 mfval, manc, reg; + int num_filters = 4; + bool manc_ipv4; + u32 bmcipval; + int i, j; + +#define MANC_EN_IPV4_FILTER BIT(24) +#define MFVAL_IPV4_FILTER_SHIFT 16 +#define MFVAL_IPV6_FILTER_SHIFT 24 +#define MIPAF_ARR(_m, _n) (IXGBE_MIPAF + ((_m) * 0x10) + ((_n) * 4)) + +#define IXGBE_BMCIP(_n) (0x5050 + ((_n) * 4)) +#define IXGBE_BMCIPVAL 0x5060 +#define BMCIP_V4 0x2 +#define BMCIP_V6 0x3 +#define BMCIP_MASK 0x3 + + manc = IXGBE_READ_REG(hw, IXGBE_MANC); + manc_ipv4 = !!(manc & MANC_EN_IPV4_FILTER); + mfval = IXGBE_READ_REG(hw, IXGBE_MFVAL); + bmcipval = IXGBE_READ_REG(hw, IXGBE_BMCIPVAL); + + if (xs->props.family == AF_INET) { + /* are there any IPv4 filters to check? */ + if (manc_ipv4) { + /* the 4 ipv4 filters are all in MIPAF(3, i) */ + for (i = 0; i < num_filters; i++) { + if (!(mfval & BIT(MFVAL_IPV4_FILTER_SHIFT + i))) + continue; + + reg = IXGBE_READ_REG(hw, MIPAF_ARR(3, i)); + if (reg == xs->id.daddr.a4) + return 1; + } + } + + if ((bmcipval & BMCIP_MASK) == BMCIP_V4) { + reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(3)); + if (reg == xs->id.daddr.a4) + return 1; + } + + } else { + /* if there are ipv4 filters, they are in the last ipv6 slot */ + if (manc_ipv4) + num_filters = 3; + + for (i = 0; i < num_filters; i++) { + if (!(mfval & BIT(MFVAL_IPV6_FILTER_SHIFT + i))) + continue; + + for (j = 0; j < 4; j++) { + reg = IXGBE_READ_REG(hw, MIPAF_ARR(i, j)); + if (reg != xs->id.daddr.a6[j]) + break; + } + if (j == 4) /* did we match all 4 words? */ + return 1; + } + + if ((bmcipval & BMCIP_MASK) == BMCIP_V6) { + for (j = 0; j < 4; j++) { + reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(j)); + if (reg != xs->id.daddr.a6[j]) + break; + } + if (j == 4) /* did we match all 4 words? */ + return 1; + } + } + + return 0; +} + /** * ixgbe_ipsec_add_sa - program device with a security association * @xs: pointer to transformer state struct @@ -465,6 +548,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (ixgbe_ipsec_check_mgmt_ip(xs)) { + netdev_err(dev, "IPsec IP addr clash with mgmt filters\n"); + return -EINVAL; + } + if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { struct rx_sa rsa; -- cgit From 9a75fa5c166346fa3de35fe8020e43bc98a171d0 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 31 May 2018 14:12:18 -0700 Subject: ixgbe: fix broken ipsec Rx with proper cast on spi Fix up a cast problem introduced by a sparse cleanup patch. This fixes a problem where the encrypted packets were not recognized on Rx and subsequently dropped. Fixes: 9cfbfa701b55 ("ixgbe: cleanup sparse warnings") Signed-off-by: Shannon Nelson Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index e1c976271bbd..344a1f213a5f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -663,7 +663,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) /* hash the new entry for faster search in Rx path */ hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist, - (__force u64)rsa.xs->id.spi); + (__force u32)rsa.xs->id.spi); } else { struct tx_sa tsa; -- cgit